!pip install datasets torch contractions textblob kaleido
import pandas as pd
import re, string, unicodedata
from bs4 import BeautifulSoup
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer, PorterStemmer
from nltk import pos_tag, ne_chunk
from nltk.chunk import tree2conlltags
# Load HuggingFace datasets
from datasets import load_dataset
# Ensure you have the required NLTK resources
import nltk
# Download the Punkt tokenizer models.
# Punkt is a pre-trained tokenizer that divides text into sentences and words.
# This model is useful for tasks such as sentence splitting and word tokenization.
nltk.download('punkt')
# Download a set of common stopwords.
# Stopwords are common words (like "and", "the", "is") that are often
# filtered out in text processing because they are too frequent to
# provide meaningful information. This resource helps with removing or ignoring
# these words during text analysis.
nltk.download('stopwords')
# Download the WordNet lexical database.
# WordNet is a large lexical database of English that groups words into sets of
# synonyms (synsets) and provides various relations between them.
# It is used for tasks like word sense disambiguation, synonyms and
# antonyms finding, and semantic analysis.
nltk.download('wordnet')
# Download the Average Perceptron Tagger for POS Tagging
nltk.download('averaged_perceptron_tagger')
# Download the Maximum Entropy Named Entity Chunker to identify and classify
# named entities in text, such as names of people, organizations, or locations.
nltk.download('maxent_ne_chunker')
# Download a corpus of words that can be used as a reference for various
# NLP tasks, such as checking if a word exists or finding synonyms.
nltk.download('words')
# Import WordCloud to visualize text data
from wordcloud import WordCloud
# Visualization plots library
import matplotlib.pyplot as plt
# Import displacy from spacy library.
# This is to visualize the processed text data post NER and POS Tagging.
import spacy
from spacy import displacy
from spacy.tokens import Span
# Import stopwords
from nltk.corpus import stopwords
# Import Tokenizer
from nltk.tokenize import word_tokenize, sent_tokenize
# Import Lemmatizer
from nltk.stem.wordnet import WordNetLemmatizer
# Visualize data in graphical formats
import matplotlib.pyplot as plt
import seaborn as sns
# To count frequencies of words
from textblob import TextBlob
from collections import Counter
# Ignore the warnings
import warnings
warnings.filterwarnings("ignore")
Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (3.0.1) Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.4.1+cu121) Requirement already satisfied: contractions in /usr/local/lib/python3.10/dist-packages (0.1.73) Requirement already satisfied: textblob in /usr/local/lib/python3.10/dist-packages (0.17.1) Requirement already satisfied: kaleido in /usr/local/lib/python3.10/dist-packages (0.2.1) Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets) (3.16.1) Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.26.4) Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (16.1.0) Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.8) Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.2.2) Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.32.3) Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.5) Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.5.0) Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.16) Requirement already satisfied: fsspec<=2024.6.1,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.6.1,>=2023.1.0->datasets) (2024.6.1) Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.10.10) Requirement already satisfied: huggingface-hub>=0.22.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.24.7) Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (24.1) Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.2) Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.12.2) Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.13.3) Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.4.1) Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4) Requirement already satisfied: textsearch>=0.0.21 in /usr/local/lib/python3.10/dist-packages (from contractions) (0.0.24) Requirement already satisfied: nltk>=3.1 in /usr/local/lib/python3.10/dist-packages (from textblob) (3.8.1) Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.3) Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1) Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0) Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1) Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0) Requirement already satisfied: yarl<2.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.15.2) Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3) Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk>=3.1->textblob) (8.1.7) Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk>=3.1->textblob) (1.4.2) Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk>=3.1->textblob) (2024.9.11) Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.4.0) Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.10) Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2.2.3) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2024.8.30) Requirement already satisfied: anyascii in /usr/local/lib/python3.10/dist-packages (from textsearch>=0.0.21->contractions) (0.3.2) Requirement already satisfied: pyahocorasick in /usr/local/lib/python3.10/dist-packages (from textsearch>=0.0.21->contractions) (2.1.0) Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (3.0.1) Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2) Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2) Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2) Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0) Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.12.0->aiohttp->datasets) (0.2.0)
[nltk_data] Downloading package punkt to /root/nltk_data... [nltk_data] Package punkt is already up-to-date! [nltk_data] Downloading package stopwords to /root/nltk_data... [nltk_data] Package stopwords is already up-to-date! [nltk_data] Downloading package wordnet to /root/nltk_data... [nltk_data] Package wordnet is already up-to-date! [nltk_data] Downloading package averaged_perceptron_tagger to [nltk_data] /root/nltk_data... [nltk_data] Package averaged_perceptron_tagger is already up-to- [nltk_data] date! [nltk_data] Downloading package maxent_ne_chunker to [nltk_data] /root/nltk_data... [nltk_data] Package maxent_ne_chunker is already up-to-date! [nltk_data] Downloading package words to /root/nltk_data... [nltk_data] Package words is already up-to-date!
train_data = pd.read_json('train-v1.1.json')
val_data = pd.read_json('dev-v1.1.json')
def convert_json_to_df(squad_data):
columns = ['context', 'question', 'answer', 'ans_start', 'ans_end']
dataset = pd.DataFrame(columns=columns)
i = 0
for value in squad_data['data']:
for data in value['paragraphs']:
for qas in data['qas']:
answer = qas['answers'][0]['text'] if qas['answers'] else "No answer available."
answer_start = qas['answers'][0]['answer_start'] if qas['answers'] else -1
dataset.loc[i] =[data['context'], qas['question'], answer, answer_start, 0]
i += 1
return dataset
train_df = convert_json_to_df(train_data)
val_df = convert_json_to_df(val_data)
As part of cleaning the text, we performed the following steps:
\n and carriage returns \r. Removed them.def clean_text(text):
# Lowercase the text (since we're using an uncased model)
text = text.lower()
# Remove newline and carriage returns
text = text.replace('\n', ' ').replace('\r', ' ')
# Replace multiple spaces with a single space
text = re.sub(r'\s+', ' ', text)
# Remove leading and trailing whitespace
text = text.strip()
return text
def clean_text_data(dataset: pd.DataFrame):
dataset[['context', 'question', 'answer']] = dataset[['context', 'question', 'answer']].map(clean_text)
# Call the cleanData function for both train_df and val_df
clean_text_data(train_df)
clean_text_data(val_df)
train_df.to_csv("squad_cleaned_train.csv", index=False)
val_df.to_csv("squad_cleaned_validation.csv", index=False)
print("CSV files have been created and saved")
CSV files have been created and saved
merged_squad_df = pd.concat([train_df, val_df])
merged_df_path = "squad_extracted_merged.csv"
# Save merged DataFrame to CSV file
merged_squad_df.to_csv(merged_df_path, index=False)
print(f"Merged dataframe has been saved to - {merged_df_path}")
Merged dataframe has been saved to - squad_extracted_merged.csv
# First 5 rows of the dataframe
merged_squad_df.head()
| context | question | answer | ans_start | ans_end | |
|---|---|---|---|---|---|
| 0 | architecturally, the school has a catholic cha... | to whom did the virgin mary allegedly appear i... | saint bernadette soubirous | 515 | 0 |
| 1 | architecturally, the school has a catholic cha... | what is in front of the notre dame main building? | a copper statue of christ | 188 | 0 |
| 2 | architecturally, the school has a catholic cha... | the basilica of the sacred heart at notre dame... | the main building | 279 | 0 |
| 3 | architecturally, the school has a catholic cha... | what is the grotto at notre dame? | a marian place of prayer and reflection | 381 | 0 |
| 4 | architecturally, the school has a catholic cha... | what sits on top of the main building at notre... | a golden statue of the virgin mary | 92 | 0 |
# Last 5 rows of the dataframe
merged_squad_df.tail()
| context | question | answer | ans_start | ans_end | |
|---|---|---|---|---|---|
| 10565 | the pound-force has a metric counterpart, less... | what is the metric term less used than the new... | kilogram-force | 82 | 0 |
| 10566 | the pound-force has a metric counterpart, less... | what is the kilogram-force sometimes reffered ... | kilopond | 114 | 0 |
| 10567 | the pound-force has a metric counterpart, less... | what is a very seldom used unit of mass in the... | slug | 274 | 0 |
| 10568 | the pound-force has a metric counterpart, less... | what seldom used term of a unit of force equal... | kip | 712 | 0 |
| 10569 | the pound-force has a metric counterpart, less... | what is the seldom used force unit equal to on... | sthène | 665 | 0 |
# Shape of the dataframe
merged_squad_df.shape
(98169, 5)
# Columns of the dataframe
merged_squad_df.columns
Index(['context', 'question', 'answer', 'ans_start', 'ans_end'], dtype='object')
# Access the first row for sample view of the data
first_row = merged_squad_df.iloc[20]
# Print each value separately
print(f"Context: {first_row['context']}\n")
print(f"Question: {first_row['question']}\n")
print(f"Answer: {first_row['answer']}")
Context: all of notre dame's undergraduate students are a part of one of the five undergraduate colleges at the school or are in the first year of studies program. the first year of studies program was established in 1962 to guide incoming freshmen in their first year at the school before they have declared a major. each student is given an academic advisor from the program who helps them to choose classes that give them exposure to any major in which they are interested. the program also includes a learning resource center which provides time management, collaborative learning, and subject tutoring. this program has been recognized previously, by u.s. news & world report, as outstanding. Question: what entity provides help with the management of time for new students at notre dame? Answer: learning resource center
# Fetch information about the dataframe
merged_squad_df.info()
<class 'pandas.core.frame.DataFrame'> Index: 98169 entries, 0 to 10569 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 context 98169 non-null object 1 question 98169 non-null object 2 answer 98169 non-null object 3 ans_start 98169 non-null int64 4 ans_end 98169 non-null int64 dtypes: int64(2), object(3) memory usage: 4.5+ MB
# Examine the datatypes of each column
merged_squad_df.dtypes
| 0 | |
|---|---|
| context | object |
| question | object |
| answer | object |
| ans_start | int64 |
| ans_end | int64 |
# Calculate the number of unique values in each column
unique_values = {column: len(merged_squad_df[column].unique()) for column in merged_squad_df.columns}
# Prepare the figure with two subplots in a single row
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# Plotting the bar plot for unique values in each column (subplot 1)
sns.barplot(x=list(unique_values.keys()), y=list(unique_values.values()), ax=axes[0], palette="viridis")
axes[0].set_title('Unique Values per Column')
axes[0].set_ylabel('Number of Unique Values')
axes[0].set_xlabel('Columns')
# Adding the count labels on top of the bars for the first plot
for i, (column, count) in enumerate(unique_values.items()):
axes[0].text(i, count, str(count), ha='center', va='bottom')
# Plotting a different plot type (e.g., count plot for demonstration) (subplot 2)
count_data = merged_squad_df.melt()['variable'].value_counts()
sns.barplot(x=count_data.index, y=count_data.values, ax=axes[1], palette="plasma")
axes[1].set_title('Count of Values per Column')
axes[1].set_ylabel('Count')
axes[1].set_xlabel('Columns')
# Adding the count labels on top of the bars for the second plot
for i, count in enumerate(count_data.values):
axes[1].text(i, count, str(count), ha='center', va='bottom')
# Adjust layout
plt.tight_layout()
plt.show()
# Get a summary of descriptive statistics for the numerical columns of the dataframe
merged_squad_df.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| ans_start | 98169.0 | 320.756777 | 268.44539 | 0.0 | 111.0 | 262.0 | 469.0 | 3126.0 |
| ans_end | 98169.0 | 0.000000 | 0.00000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
# Examine the datatypes of each column
merged_squad_df.dtypes
| 0 | |
|---|---|
| context | object |
| question | object |
| answer | object |
| ans_start | int64 |
| ans_end | int64 |
import plotly.express as px
# Check if there are duplicate rows
pd.set_option('display.max_colwidth', None)
duplicate_rows = merged_squad_df[merged_squad_df.duplicated()]
if not duplicate_rows.empty:
# Create a sunburst plot with increased size and tight layout
fig = px.sunburst(
duplicate_rows,
path=duplicate_rows.columns.tolist(),
title='Sunburst Chart of Duplicate Rows',
width=3500,
height=3000
)
# Update layout for a tight fit
fig.update_layout(
margin=dict(l=20, r=20, t=40, b=20), # Adjust margins
title_x=0.5, # Center the title
uniformtext=dict(minsize=10, mode='hide') # Adjust text to avoid overlap
)
# Save the figure as an HTML file
fig.write_html("sunburst_chart.html")
# Download the saved HTML file
from google.colab import files
files.download('sunburst_chart.html')
print("Sunburst chart saved as 'sunburst_chart.html'. Please open that file to review")
else:
print("No duplicate rows found.")
Sunburst chart saved as 'sunburst_chart.html'. Please open that file to review
# Check for duplicate rows.
pd.set_option('display.max_colwidth', None)
duplicate_rows = merged_squad_df[merged_squad_df.duplicated()]
duplicate_rows
| context | question | answer | ans_start | ans_end | |
|---|---|---|---|---|---|
| 529 | on january 7, 2012, beyoncé gave birth to her first child, a daughter, blue ivy carter, at lenox hill hospital in new york. five months later, she performed for four nights at revel atlantic city's ovation hall to celebrate the resort's opening, her first performances since giving birth to blue ivy. | when did beyonce have her first child? | january 7, 2012 | 3 | 0 |
| 1650 | in 1827, soon after the death of chopin's youngest sister emilia, the family moved from the warsaw university building, adjacent to the kazimierz palace, to lodgings just across the street from the university, in the south annex of the krasiński palace on krakowskie przedmieście,[n 5] where chopin lived until he left warsaw in 1830.[n 6] here his parents continued running their boarding house for male students; the chopin family parlour (salonik chopinów) became a museum in the 20th century. in 1829 the artist ambroży mieroszewski executed a set of portraits of chopin family members, including the first known portrait of the composer.[n 7] | what year did chopin leave warsaw? | 1830 | 329 | 0 |
| 1794 | although the two displayed great respect and admiration for each other, their friendship was uneasy and had some qualities of a love-hate relationship. harold c. schonberg believes that chopin displayed a "tinge of jealousy and spite" towards liszt's virtuosity on the piano, and others have also argued that he had become enchanted with liszt's theatricality, showmanship and success. liszt was the dedicatee of chopin's op. 10 études, and his performance of them prompted the composer to write to hiller, "i should like to rob him of the way he plays my studies." however, chopin expressed annoyance in 1843 when liszt performed one of his nocturnes with the addition of numerous intricate embellishments, at which chopin remarked that he should play the music as written or not play it at all, forcing an apology. most biographers of chopin state that after this the two had little to do with each other, although in his letters dated as late as 1848 he still referred to him as "my friend liszt". some commentators point to events in the two men's romantic lives which led to a rift between them; there are claims that liszt had displayed jealousy of his mistress marie d'agoult's obsession with chopin, while others believe that chopin had become concerned about liszt's growing relationship with george sand. | what was the name of liszt's mistress? | marie d'agoult | 1168 | 0 |
| 2019 | the funeral, held at the church of the madeleine in paris, was delayed almost two weeks, until 30 october. entrance was restricted to ticket holders as many people were expected to attend. over 3,000 people arrived without invitations, from as far as london, berlin and vienna, and were excluded. | how long was chopin's funeral delayed? | two weeks | 78 | 0 |
| 2037 | chopin's tombstone, featuring the muse of music, euterpe, weeping over a broken lyre, was designed and sculpted by clésinger. the expenses of the funeral and monument, amounting to 5,000 francs, were covered by jane stirling, who also paid for the return of the composer's sister ludwika to warsaw. ludwika took chopin's heart in an urn, preserved in alcohol, back to poland in 1850.[n 9] she also took a collection of two hundred letters from sand to chopin; after 1851 these were returned to sand, who seems to have destroyed them. | who paid for chopin's funeral? | jane stirling | 211 | 0 |
| ... | ... | ... | ... | ... | ... |
| 9677 | sayyid abul ala maududi was an important early twentieth-century figure in the islamic revival in india, and then after independence from britain, in pakistan. trained as a lawyer he chose the profession of journalism, and wrote about contemporary issues and most importantly about islam and islamic law. maududi founded the jamaat-e-islami party in 1941 and remained its leader until 1972. however, maududi had much more impact through his writing than through his political organising. his extremely influential books (translated into many languages) placed islam in a modern context, and influenced not only conservative ulema but liberal modernizer islamists such as al-faruqi, whose "islamization of knowledge" carried forward some of maududi's key principles. | where did maududi exert the most impact? | through his writing | 429 | 0 |
| 10511 | it is a common misconception to ascribe the stiffness and rigidity of solid matter to the repulsion of like charges under the influence of the electromagnetic force. however, these characteristics actually result from the pauli exclusion principle.[citation needed] since electrons are fermions, they cannot occupy the same quantum mechanical state as other electrons. when the electrons in a material are densely packed together, there are not enough lower energy quantum mechanical states for them all, so some of them must be in higher energy states. this means that it takes energy to pack them together. while this effect is manifested macroscopically as a structural force, it is technically only the result of the existence of a finite set of electron states. | what is often misunderstood as the cause of matter rigidity? | repulsion of like charges | 90 | 0 |
| 10512 | it is a common misconception to ascribe the stiffness and rigidity of solid matter to the repulsion of like charges under the influence of the electromagnetic force. however, these characteristics actually result from the pauli exclusion principle.[citation needed] since electrons are fermions, they cannot occupy the same quantum mechanical state as other electrons. when the electrons in a material are densely packed together, there are not enough lower energy quantum mechanical states for them all, so some of them must be in higher energy states. this means that it takes energy to pack them together. while this effect is manifested macroscopically as a structural force, it is technically only the result of the existence of a finite set of electron states. | what actually causes rigidity in matter? | the pauli exclusion principle | 218 | 0 |
| 10513 | it is a common misconception to ascribe the stiffness and rigidity of solid matter to the repulsion of like charges under the influence of the electromagnetic force. however, these characteristics actually result from the pauli exclusion principle.[citation needed] since electrons are fermions, they cannot occupy the same quantum mechanical state as other electrons. when the electrons in a material are densely packed together, there are not enough lower energy quantum mechanical states for them all, so some of them must be in higher energy states. this means that it takes energy to pack them together. while this effect is manifested macroscopically as a structural force, it is technically only the result of the existence of a finite set of electron states. | what is needed to pack electrons densely together? | energy | 579 | 0 |
| 10514 | it is a common misconception to ascribe the stiffness and rigidity of solid matter to the repulsion of like charges under the influence of the electromagnetic force. however, these characteristics actually result from the pauli exclusion principle.[citation needed] since electrons are fermions, they cannot occupy the same quantum mechanical state as other electrons. when the electrons in a material are densely packed together, there are not enough lower energy quantum mechanical states for them all, so some of them must be in higher energy states. this means that it takes energy to pack them together. while this effect is manifested macroscopically as a structural force, it is technically only the result of the existence of a finite set of electron states. | how is the pauli exclusion priciple manifested in the macro world? | as a structural force | 657 | 0 |
134 rows × 5 columns
Here, I noticed that there are no missing values at all in the dataset. Hence, the plot looks empty.
# Visualize the total number of missing values per column
missing_counts = merged_squad_df.isnull().sum()
plt.figure(figsize=(10, 6))
missing_counts.plot(kind='bar', color='skyblue')
plt.title('Number of Missing Values per Column')
plt.xlabel('Columns')
plt.ylabel('Missing Values Count')
plt.show()
# Check for missing values in each column
merged_squad_df.isnull().sum()
| 0 | |
|---|---|
| context | 0 |
| question | 0 |
| answer | 0 |
| ans_start | 0 |
| ans_end | 0 |
In general, let us say there is a CSV file containing the dataset and it has multiple columns. But only one column of it has text content. If we need to perform any task related to the text content, we can easily do it.
But sometimes, there are datasets where there are multiple columns having text content in all of them.
During this scenario, it is our utmost responsibility to select the correct column (containing text data) of the dataset. Let me simply explain how to select the column to preprocess and do further analysis.
Relevance to the Task: Choose the column most pertinent to your analysis or machine/deep learning task.
Content Quality: Assess the quality of the text. Look for columns that have fewer missing values, less noise, or more coherent language.
Length and Structure: Consider the average length of the text and whether the format is consistent. Short, structured text (like titles) might be more appropriate for certain analyses compared to long, free-form text.
Diversity: If the task requires varied inputs, choose columns that provide a range of perspectives or types of information.
Data Volume: Analyze the size of the text in each column. Columns with more substantial data might provide better insights for modeling.
Feature Extraction Potential: Evaluate the potential for extracting useful features (like keywords or entities) from the text.
Apply the above guidelines to select the text data column in case of multiple text data columns in the dataframe.
Now, let us make use of the above guidelines and decide the column containing the text data to preprocess further from the merged SQuAD Dataframe.
Compare these columns with the above guidelines.
Relevance to the Task: Our task is to perform preprocessing. all the context, question and answer columns have text data.
Content Quality: context, question and answer are inter-related to each other. All these columns do not have any null or missing values.
Length and Structure: The average length of the text and consistent format is available in context, question and answer columns. Though the answer column has very less data, but it still has some noise. Hence, we need to clean that.
Diversity: Various ranges and types of information is available in the context, question and answer columns.
Data Volume: context, question and answer columns have the substantial data that provides diverse range of information.
Feature Extraction Potential: The potential for extracting useful features (like keywords or entities) from the text is mostly available in context, question and answer columns.
To perform text preprocessing, we need only the claim column. Hence, as context, question and answer columns are interconnected, I have taken them into a new dataframe. Then I passed them to the text preprocessing.
Look at the following code and results.
# Create a new dataframe with just "context", "question" and "answer" columns
data = merged_squad_df[["context", "question", "answer"]]
pd.set_option('display.max_colwidth', None)
data.head(10)
| context | question | answer | |
|---|---|---|---|
| 0 | architecturally, the school has a catholic character. atop the main building's gold dome is a golden statue of the virgin mary. immediately in front of the main building and facing it, is a copper statue of christ with arms upraised with the legend "venite ad me omnes". next to the main building is the basilica of the sacred heart. immediately behind the basilica is the grotto, a marian place of prayer and reflection. it is a replica of the grotto at lourdes, france where the virgin mary reputedly appeared to saint bernadette soubirous in 1858. at the end of the main drive (and in a direct line that connects through 3 statues and the gold dome), is a simple, modern stone statue of mary. | to whom did the virgin mary allegedly appear in 1858 in lourdes france? | saint bernadette soubirous |
| 1 | architecturally, the school has a catholic character. atop the main building's gold dome is a golden statue of the virgin mary. immediately in front of the main building and facing it, is a copper statue of christ with arms upraised with the legend "venite ad me omnes". next to the main building is the basilica of the sacred heart. immediately behind the basilica is the grotto, a marian place of prayer and reflection. it is a replica of the grotto at lourdes, france where the virgin mary reputedly appeared to saint bernadette soubirous in 1858. at the end of the main drive (and in a direct line that connects through 3 statues and the gold dome), is a simple, modern stone statue of mary. | what is in front of the notre dame main building? | a copper statue of christ |
| 2 | architecturally, the school has a catholic character. atop the main building's gold dome is a golden statue of the virgin mary. immediately in front of the main building and facing it, is a copper statue of christ with arms upraised with the legend "venite ad me omnes". next to the main building is the basilica of the sacred heart. immediately behind the basilica is the grotto, a marian place of prayer and reflection. it is a replica of the grotto at lourdes, france where the virgin mary reputedly appeared to saint bernadette soubirous in 1858. at the end of the main drive (and in a direct line that connects through 3 statues and the gold dome), is a simple, modern stone statue of mary. | the basilica of the sacred heart at notre dame is beside to which structure? | the main building |
| 3 | architecturally, the school has a catholic character. atop the main building's gold dome is a golden statue of the virgin mary. immediately in front of the main building and facing it, is a copper statue of christ with arms upraised with the legend "venite ad me omnes". next to the main building is the basilica of the sacred heart. immediately behind the basilica is the grotto, a marian place of prayer and reflection. it is a replica of the grotto at lourdes, france where the virgin mary reputedly appeared to saint bernadette soubirous in 1858. at the end of the main drive (and in a direct line that connects through 3 statues and the gold dome), is a simple, modern stone statue of mary. | what is the grotto at notre dame? | a marian place of prayer and reflection |
| 4 | architecturally, the school has a catholic character. atop the main building's gold dome is a golden statue of the virgin mary. immediately in front of the main building and facing it, is a copper statue of christ with arms upraised with the legend "venite ad me omnes". next to the main building is the basilica of the sacred heart. immediately behind the basilica is the grotto, a marian place of prayer and reflection. it is a replica of the grotto at lourdes, france where the virgin mary reputedly appeared to saint bernadette soubirous in 1858. at the end of the main drive (and in a direct line that connects through 3 statues and the gold dome), is a simple, modern stone statue of mary. | what sits on top of the main building at notre dame? | a golden statue of the virgin mary |
| 5 | as at most other universities, notre dame's students run a number of news media outlets. the nine student-run outlets include three newspapers, both a radio and television station, and several magazines and journals. begun as a one-page journal in september 1876, the scholastic magazine is issued twice monthly and claims to be the oldest continuous collegiate publication in the united states. the other magazine, the juggler, is released twice a year and focuses on student literature and artwork. the dome yearbook is published annually. the newspapers have varying publication interests, with the observer published daily and mainly reporting university and other news, and staffed by students from both notre dame and saint mary's college. unlike scholastic and the dome, the observer is an independent publication and does not have a faculty advisor or any editorial oversight from the university. in 1987, when some students believed that the observer began to show a conservative bias, a liberal newspaper, common sense was published. likewise, in 2003, when other students believed that the paper showed a liberal bias, the conservative paper irish rover went into production. neither paper is published as often as the observer; however, all three are distributed to all students. finally, in spring 2008 an undergraduate journal for political science research, beyond politics, made its debut. | when did the scholastic magazine of notre dame begin publishing? | september 1876 |
| 6 | as at most other universities, notre dame's students run a number of news media outlets. the nine student-run outlets include three newspapers, both a radio and television station, and several magazines and journals. begun as a one-page journal in september 1876, the scholastic magazine is issued twice monthly and claims to be the oldest continuous collegiate publication in the united states. the other magazine, the juggler, is released twice a year and focuses on student literature and artwork. the dome yearbook is published annually. the newspapers have varying publication interests, with the observer published daily and mainly reporting university and other news, and staffed by students from both notre dame and saint mary's college. unlike scholastic and the dome, the observer is an independent publication and does not have a faculty advisor or any editorial oversight from the university. in 1987, when some students believed that the observer began to show a conservative bias, a liberal newspaper, common sense was published. likewise, in 2003, when other students believed that the paper showed a liberal bias, the conservative paper irish rover went into production. neither paper is published as often as the observer; however, all three are distributed to all students. finally, in spring 2008 an undergraduate journal for political science research, beyond politics, made its debut. | how often is notre dame's the juggler published? | twice |
| 7 | as at most other universities, notre dame's students run a number of news media outlets. the nine student-run outlets include three newspapers, both a radio and television station, and several magazines and journals. begun as a one-page journal in september 1876, the scholastic magazine is issued twice monthly and claims to be the oldest continuous collegiate publication in the united states. the other magazine, the juggler, is released twice a year and focuses on student literature and artwork. the dome yearbook is published annually. the newspapers have varying publication interests, with the observer published daily and mainly reporting university and other news, and staffed by students from both notre dame and saint mary's college. unlike scholastic and the dome, the observer is an independent publication and does not have a faculty advisor or any editorial oversight from the university. in 1987, when some students believed that the observer began to show a conservative bias, a liberal newspaper, common sense was published. likewise, in 2003, when other students believed that the paper showed a liberal bias, the conservative paper irish rover went into production. neither paper is published as often as the observer; however, all three are distributed to all students. finally, in spring 2008 an undergraduate journal for political science research, beyond politics, made its debut. | what is the daily student paper at notre dame called? | the observer |
| 8 | as at most other universities, notre dame's students run a number of news media outlets. the nine student-run outlets include three newspapers, both a radio and television station, and several magazines and journals. begun as a one-page journal in september 1876, the scholastic magazine is issued twice monthly and claims to be the oldest continuous collegiate publication in the united states. the other magazine, the juggler, is released twice a year and focuses on student literature and artwork. the dome yearbook is published annually. the newspapers have varying publication interests, with the observer published daily and mainly reporting university and other news, and staffed by students from both notre dame and saint mary's college. unlike scholastic and the dome, the observer is an independent publication and does not have a faculty advisor or any editorial oversight from the university. in 1987, when some students believed that the observer began to show a conservative bias, a liberal newspaper, common sense was published. likewise, in 2003, when other students believed that the paper showed a liberal bias, the conservative paper irish rover went into production. neither paper is published as often as the observer; however, all three are distributed to all students. finally, in spring 2008 an undergraduate journal for political science research, beyond politics, made its debut. | how many student news papers are found at notre dame? | three |
| 9 | as at most other universities, notre dame's students run a number of news media outlets. the nine student-run outlets include three newspapers, both a radio and television station, and several magazines and journals. begun as a one-page journal in september 1876, the scholastic magazine is issued twice monthly and claims to be the oldest continuous collegiate publication in the united states. the other magazine, the juggler, is released twice a year and focuses on student literature and artwork. the dome yearbook is published annually. the newspapers have varying publication interests, with the observer published daily and mainly reporting university and other news, and staffed by students from both notre dame and saint mary's college. unlike scholastic and the dome, the observer is an independent publication and does not have a faculty advisor or any editorial oversight from the university. in 1987, when some students believed that the observer began to show a conservative bias, a liberal newspaper, common sense was published. likewise, in 2003, when other students believed that the paper showed a liberal bias, the conservative paper irish rover went into production. neither paper is published as often as the observer; however, all three are distributed to all students. finally, in spring 2008 an undergraduate journal for political science research, beyond politics, made its debut. | in what year did the student paper common sense begin publication at notre dame? | 1987 |
To preprocess any text data, we have many steps to follow. As part of this assignment, I followed the below preprocessing steps:
Removal of HTML tags: Strips HTML tags, if any (e.g., <div>, <p>) from text, leaving only the content. This is essential for cleaning web-scraped data or text extracted from HTML documents. It helps ensure that only the meaningful text is processed. It also removes unwanted scrap from the data to make it more readable.
Removal of URLs: Eliminates web addresses (e.g., http://example.com) from the text. URLs are often irrelevant to text analysis and they might add noise. Removing them helps in focusing on the actual content of the text.
Removal of Email IDs: Deletes email addresses (e.g., user@example.com) from the text. Email IDs are typically not useful for analysis and can be considered noise, so removing them cleans the text for more accurate processing. Having more noise in the data yields poor results. Hence, it is better to torture the data as much as possible until we obtain positive results.
Convert to Lowercase: This was already taken care before itself.
Replace contractions with simple strings: This involves expanding shortened forms of words into their full, standard expressions. Contractions are often used in casual or spoken English to create a more informal tone, but in more formal writing or for certain types of text analysis, it's sometimes necessary to convert these contractions back to their complete forms.
Examples of contractions:
"can't" -> "cannot"
"don't" -> "do not"
"it's" -> "it is"
"you're" -> "you are"
"she's" -> "she is"
"he's" -> "he is"
Removal of Punctuations: Removes punctuation marks (e.g., .,!?) from the text. Punctuation can often be extraneous for text processing tasks, so removing it helps in simplifying the text and focusing on the words themselves.
Perform Tokenization: Tokenization is a fundamental process of NLP where the text data is split into smaller and manageable units called Tokens. The token could be anything such as words, phrases, symbols etc.
Lemmatization and Stopwords Removal:
*Lemmatization* reduces words to their base or dictionary form (lemma) using vocabulary and morphological analysis. It is more precise and results in actual words. *Stopwords Removal* is the process of filtering out the common words that are often considered irrelevant for analysis. These words are known as "stop words," and they typically include articles, prepositions, conjunctions, and other common terms that do not carry significant meaning in the context of analysis. Examples include words like "the," "and," "is," "in," and "on.". Small note for Lemmatization and Stopwords Removal:
doc = nlp(text) line in the below code as part of this step is nothing but the loaded SpaCy NLP model. When we pass the clean text to this, it will by default performs tokenization.
Lemmatization and Stopwords removal happen only after tokenization.
Here I have loaded the common SpaCy English Large sized model and performed almost all the preprocessing steps using SpaCy.
# Download and load SpaCy English Large model
nlp = spacy.load("en_core_web_lg")
def preprocess_text(text):
# Remove HTML tags
text = BeautifulSoup(text, 'html.parser').get_text()
# Remove URLs
text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
# Remove Email IDs
# Define the regex pattern for matching email addresses
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
# Substitute email addresses with an empty string
text = re.sub(email_pattern, '', text)
# Replace contractions with simple strings
# Initial dictionary of common contractions
contractions_dict = {
"ain't": "am not",
"aren't": "are not",
"can't": "cannot",
"couldn't": "could not",
"didn't": "did not",
"doesn't": "does not",
"don't": "do not",
"hadn't": "had not",
"hasn't": "has not",
"haven't": "have not",
"he's": "he is",
"how's": "how is",
"I'm": "I am",
"I've": "I have",
"isn't": "is not",
"it'll": "it will",
"it's": "it is",
"let's": "let us",
"mightn't": "might not",
"mustn't": "must not",
"shan't": "shall not",
"she's": "she is",
"shouldn't": "should not",
"wasn't": "was not",
"we're": "we are",
"weren't": "were not",
"what's": "what is",
"where's": "where is",
"who's": "who is",
"won't": "will not",
"wouldn't": "would not",
"gonna": "going to",
"wanna": "want to"
}
pattern = re.compile(r'\b(' + '|'.join(re.escape(key) for key in contractions_dict.keys()) + r')\b')
text = pattern.sub(lambda x: contractions_dict[x.group()], text)
# Remove punctuation
text = re.sub(r'[^\w\s]', '', text)
# Lemmatize and remove stopwords
doc = nlp(text)
return ' '.join([token.lemma_ for token in doc if not token.is_stop])
# List of columns to apply the function to
columns_to_preprocess = ['context', 'question', 'answer']
# Apply the preprocessing function to each specified column
data[columns_to_preprocess] = data[columns_to_preprocess].applymap(preprocess_text)
# Display the first 10 rows of the updated DataFrame
data.head(10)
| context | question | answer | |
|---|---|---|---|
| 0 | architecturally school catholic character atop main building gold dome golden statue virgin mary immediately main building face copper statue christ arm upraise legend venite ad omnes main building basilica sacred heart immediately basilica grotto marian place prayer reflection replica grotto lourdes france virgin mary reputedly appear saint bernadette soubirous 1858 end main drive direct line connect 3 statue gold dome simple modern stone statue mary | virgin mary allegedly appear 1858 lourdes france | saint bernadette soubirous |
| 1 | architecturally school catholic character atop main building gold dome golden statue virgin mary immediately main building face copper statue christ arm upraise legend venite ad omnes main building basilica sacred heart immediately basilica grotto marian place prayer reflection replica grotto lourdes france virgin mary reputedly appear saint bernadette soubirous 1858 end main drive direct line connect 3 statue gold dome simple modern stone statue mary | notre dame main building | copper statue christ |
| 2 | architecturally school catholic character atop main building gold dome golden statue virgin mary immediately main building face copper statue christ arm upraise legend venite ad omnes main building basilica sacred heart immediately basilica grotto marian place prayer reflection replica grotto lourdes france virgin mary reputedly appear saint bernadette soubirous 1858 end main drive direct line connect 3 statue gold dome simple modern stone statue mary | basilica sacred heart notre dame structure | main building |
| 3 | architecturally school catholic character atop main building gold dome golden statue virgin mary immediately main building face copper statue christ arm upraise legend venite ad omnes main building basilica sacred heart immediately basilica grotto marian place prayer reflection replica grotto lourdes france virgin mary reputedly appear saint bernadette soubirous 1858 end main drive direct line connect 3 statue gold dome simple modern stone statue mary | grotto notre dame | marian place prayer reflection |
| 4 | architecturally school catholic character atop main building gold dome golden statue virgin mary immediately main building face copper statue christ arm upraise legend venite ad omnes main building basilica sacred heart immediately basilica grotto marian place prayer reflection replica grotto lourdes france virgin mary reputedly appear saint bernadette soubirous 1858 end main drive direct line connect 3 statue gold dome simple modern stone statue mary | sit main building notre dame | golden statue virgin mary |
| 5 | university notre dames student run number news medium outlet studentrun outlet include newspaper radio television station magazine journal begin onepage journal september 1876 scholastic magazine issue twice monthly claim old continuous collegiate publication united states magazine juggler release twice year focus student literature artwork dome yearbook publish annually newspaper vary publication interest observer publish daily mainly report university news staff student notre dame saint marys college unlike scholastic dome observer independent publication faculty advisor editorial oversight university 1987 student believe observer begin conservative bias liberal newspaper common sense publish likewise 2003 student believe paper show liberal bias conservative paper irish rover go production paper publish observer distribute student finally spring 2008 undergraduate journal political science research politic debut | scholastic magazine notre dame begin publish | september 1876 |
| 6 | university notre dames student run number news medium outlet studentrun outlet include newspaper radio television station magazine journal begin onepage journal september 1876 scholastic magazine issue twice monthly claim old continuous collegiate publication united states magazine juggler release twice year focus student literature artwork dome yearbook publish annually newspaper vary publication interest observer publish daily mainly report university news staff student notre dame saint marys college unlike scholastic dome observer independent publication faculty advisor editorial oversight university 1987 student believe observer begin conservative bias liberal newspaper common sense publish likewise 2003 student believe paper show liberal bias conservative paper irish rover go production paper publish observer distribute student finally spring 2008 undergraduate journal political science research politic debut | notre dames juggler publish | twice |
| 7 | university notre dames student run number news medium outlet studentrun outlet include newspaper radio television station magazine journal begin onepage journal september 1876 scholastic magazine issue twice monthly claim old continuous collegiate publication united states magazine juggler release twice year focus student literature artwork dome yearbook publish annually newspaper vary publication interest observer publish daily mainly report university news staff student notre dame saint marys college unlike scholastic dome observer independent publication faculty advisor editorial oversight university 1987 student believe observer begin conservative bias liberal newspaper common sense publish likewise 2003 student believe paper show liberal bias conservative paper irish rover go production paper publish observer distribute student finally spring 2008 undergraduate journal political science research politic debut | daily student paper notre dame call | observer |
| 8 | university notre dames student run number news medium outlet studentrun outlet include newspaper radio television station magazine journal begin onepage journal september 1876 scholastic magazine issue twice monthly claim old continuous collegiate publication united states magazine juggler release twice year focus student literature artwork dome yearbook publish annually newspaper vary publication interest observer publish daily mainly report university news staff student notre dame saint marys college unlike scholastic dome observer independent publication faculty advisor editorial oversight university 1987 student believe observer begin conservative bias liberal newspaper common sense publish likewise 2003 student believe paper show liberal bias conservative paper irish rover go production paper publish observer distribute student finally spring 2008 undergraduate journal political science research politic debut | student news paper find notre dame | |
| 9 | university notre dames student run number news medium outlet studentrun outlet include newspaper radio television station magazine journal begin onepage journal september 1876 scholastic magazine issue twice monthly claim old continuous collegiate publication united states magazine juggler release twice year focus student literature artwork dome yearbook publish annually newspaper vary publication interest observer publish daily mainly report university news staff student notre dame saint marys college unlike scholastic dome observer independent publication faculty advisor editorial oversight university 1987 student believe observer begin conservative bias liberal newspaper common sense publish likewise 2003 student believe paper show liberal bias conservative paper irish rover go production paper publish observer distribute student finally spring 2008 undergraduate journal political science research politic debut | year student paper common sense begin publication notre dame | 1987 |
Almost all the text preprocessing is already done above. But tokenization was not done.
We have to perform tokenization at the starting point itself but we have already used doc = nlp(text) in the above code cell.
Hence, for lemmatization and stop words removal, the NLP loaded model itself has taken care of tokenization.
Now, we have to explicitly perform tokenization to all the context, question and answer columns post preprocessing.
*One of my learnings is:* If we perform tokenization post text-preprocessing, the text looks clean after tokenization. Else, there might be a chance of noise inside the text, how much ever we take care.
tokenized_context, tokenized_question and tokenized_answer instead of distrubing the existing preprocessed claim.# List of columns to tokenize
columns_to_tokenize = ['context', 'question', 'answer']
# Perform tokenization and create new columns for the tokenized results
for column in columns_to_tokenize:
data[f'tokenized_{column}'] = data.apply(lambda row: nltk.word_tokenize(row[column]), axis=1)
# Display the first 10 rows of the updated DataFrame
data.head(10)
| context | question | answer | tokenized_context | tokenized_question | tokenized_answer | |
|---|---|---|---|---|---|---|
| 0 | architecturally school catholic character atop main building gold dome golden statue virgin mary immediately main building face copper statue christ arm upraise legend venite ad omnes main building basilica sacred heart immediately basilica grotto marian place prayer reflection replica grotto lourdes france virgin mary reputedly appear saint bernadette soubirous 1858 end main drive direct line connect 3 statue gold dome simple modern stone statue mary | virgin mary allegedly appear 1858 lourdes france | saint bernadette soubirous | [architecturally, school, catholic, character, atop, main, building, gold, dome, golden, statue, virgin, mary, immediately, main, building, face, copper, statue, christ, arm, upraise, legend, venite, ad, omnes, main, building, basilica, sacred, heart, immediately, basilica, grotto, marian, place, prayer, reflection, replica, grotto, lourdes, france, virgin, mary, reputedly, appear, saint, bernadette, soubirous, 1858, end, main, drive, direct, line, connect, 3, statue, gold, dome, simple, modern, stone, statue, mary] | [virgin, mary, allegedly, appear, 1858, lourdes, france] | [saint, bernadette, soubirous] |
| 1 | architecturally school catholic character atop main building gold dome golden statue virgin mary immediately main building face copper statue christ arm upraise legend venite ad omnes main building basilica sacred heart immediately basilica grotto marian place prayer reflection replica grotto lourdes france virgin mary reputedly appear saint bernadette soubirous 1858 end main drive direct line connect 3 statue gold dome simple modern stone statue mary | notre dame main building | copper statue christ | [architecturally, school, catholic, character, atop, main, building, gold, dome, golden, statue, virgin, mary, immediately, main, building, face, copper, statue, christ, arm, upraise, legend, venite, ad, omnes, main, building, basilica, sacred, heart, immediately, basilica, grotto, marian, place, prayer, reflection, replica, grotto, lourdes, france, virgin, mary, reputedly, appear, saint, bernadette, soubirous, 1858, end, main, drive, direct, line, connect, 3, statue, gold, dome, simple, modern, stone, statue, mary] | [notre, dame, main, building] | [copper, statue, christ] |
| 2 | architecturally school catholic character atop main building gold dome golden statue virgin mary immediately main building face copper statue christ arm upraise legend venite ad omnes main building basilica sacred heart immediately basilica grotto marian place prayer reflection replica grotto lourdes france virgin mary reputedly appear saint bernadette soubirous 1858 end main drive direct line connect 3 statue gold dome simple modern stone statue mary | basilica sacred heart notre dame structure | main building | [architecturally, school, catholic, character, atop, main, building, gold, dome, golden, statue, virgin, mary, immediately, main, building, face, copper, statue, christ, arm, upraise, legend, venite, ad, omnes, main, building, basilica, sacred, heart, immediately, basilica, grotto, marian, place, prayer, reflection, replica, grotto, lourdes, france, virgin, mary, reputedly, appear, saint, bernadette, soubirous, 1858, end, main, drive, direct, line, connect, 3, statue, gold, dome, simple, modern, stone, statue, mary] | [basilica, sacred, heart, notre, dame, structure] | [main, building] |
| 3 | architecturally school catholic character atop main building gold dome golden statue virgin mary immediately main building face copper statue christ arm upraise legend venite ad omnes main building basilica sacred heart immediately basilica grotto marian place prayer reflection replica grotto lourdes france virgin mary reputedly appear saint bernadette soubirous 1858 end main drive direct line connect 3 statue gold dome simple modern stone statue mary | grotto notre dame | marian place prayer reflection | [architecturally, school, catholic, character, atop, main, building, gold, dome, golden, statue, virgin, mary, immediately, main, building, face, copper, statue, christ, arm, upraise, legend, venite, ad, omnes, main, building, basilica, sacred, heart, immediately, basilica, grotto, marian, place, prayer, reflection, replica, grotto, lourdes, france, virgin, mary, reputedly, appear, saint, bernadette, soubirous, 1858, end, main, drive, direct, line, connect, 3, statue, gold, dome, simple, modern, stone, statue, mary] | [grotto, notre, dame] | [marian, place, prayer, reflection] |
| 4 | architecturally school catholic character atop main building gold dome golden statue virgin mary immediately main building face copper statue christ arm upraise legend venite ad omnes main building basilica sacred heart immediately basilica grotto marian place prayer reflection replica grotto lourdes france virgin mary reputedly appear saint bernadette soubirous 1858 end main drive direct line connect 3 statue gold dome simple modern stone statue mary | sit main building notre dame | golden statue virgin mary | [architecturally, school, catholic, character, atop, main, building, gold, dome, golden, statue, virgin, mary, immediately, main, building, face, copper, statue, christ, arm, upraise, legend, venite, ad, omnes, main, building, basilica, sacred, heart, immediately, basilica, grotto, marian, place, prayer, reflection, replica, grotto, lourdes, france, virgin, mary, reputedly, appear, saint, bernadette, soubirous, 1858, end, main, drive, direct, line, connect, 3, statue, gold, dome, simple, modern, stone, statue, mary] | [sit, main, building, notre, dame] | [golden, statue, virgin, mary] |
| 5 | university notre dames student run number news medium outlet studentrun outlet include newspaper radio television station magazine journal begin onepage journal september 1876 scholastic magazine issue twice monthly claim old continuous collegiate publication united states magazine juggler release twice year focus student literature artwork dome yearbook publish annually newspaper vary publication interest observer publish daily mainly report university news staff student notre dame saint marys college unlike scholastic dome observer independent publication faculty advisor editorial oversight university 1987 student believe observer begin conservative bias liberal newspaper common sense publish likewise 2003 student believe paper show liberal bias conservative paper irish rover go production paper publish observer distribute student finally spring 2008 undergraduate journal political science research politic debut | scholastic magazine notre dame begin publish | september 1876 | [university, notre, dames, student, run, number, news, medium, outlet, studentrun, outlet, include, newspaper, radio, television, station, magazine, journal, begin, onepage, journal, september, 1876, scholastic, magazine, issue, twice, monthly, claim, old, continuous, collegiate, publication, united, states, magazine, juggler, release, twice, year, focus, student, literature, artwork, dome, yearbook, publish, annually, newspaper, vary, publication, interest, observer, publish, daily, mainly, report, university, news, staff, student, notre, dame, saint, marys, college, unlike, scholastic, dome, observer, independent, publication, faculty, advisor, editorial, oversight, university, 1987, student, believe, observer, begin, conservative, bias, liberal, newspaper, common, sense, publish, likewise, 2003, student, believe, paper, show, liberal, bias, conservative, paper, irish, ...] | [scholastic, magazine, notre, dame, begin, publish] | [september, 1876] |
| 6 | university notre dames student run number news medium outlet studentrun outlet include newspaper radio television station magazine journal begin onepage journal september 1876 scholastic magazine issue twice monthly claim old continuous collegiate publication united states magazine juggler release twice year focus student literature artwork dome yearbook publish annually newspaper vary publication interest observer publish daily mainly report university news staff student notre dame saint marys college unlike scholastic dome observer independent publication faculty advisor editorial oversight university 1987 student believe observer begin conservative bias liberal newspaper common sense publish likewise 2003 student believe paper show liberal bias conservative paper irish rover go production paper publish observer distribute student finally spring 2008 undergraduate journal political science research politic debut | notre dames juggler publish | twice | [university, notre, dames, student, run, number, news, medium, outlet, studentrun, outlet, include, newspaper, radio, television, station, magazine, journal, begin, onepage, journal, september, 1876, scholastic, magazine, issue, twice, monthly, claim, old, continuous, collegiate, publication, united, states, magazine, juggler, release, twice, year, focus, student, literature, artwork, dome, yearbook, publish, annually, newspaper, vary, publication, interest, observer, publish, daily, mainly, report, university, news, staff, student, notre, dame, saint, marys, college, unlike, scholastic, dome, observer, independent, publication, faculty, advisor, editorial, oversight, university, 1987, student, believe, observer, begin, conservative, bias, liberal, newspaper, common, sense, publish, likewise, 2003, student, believe, paper, show, liberal, bias, conservative, paper, irish, ...] | [notre, dames, juggler, publish] | [twice] |
| 7 | university notre dames student run number news medium outlet studentrun outlet include newspaper radio television station magazine journal begin onepage journal september 1876 scholastic magazine issue twice monthly claim old continuous collegiate publication united states magazine juggler release twice year focus student literature artwork dome yearbook publish annually newspaper vary publication interest observer publish daily mainly report university news staff student notre dame saint marys college unlike scholastic dome observer independent publication faculty advisor editorial oversight university 1987 student believe observer begin conservative bias liberal newspaper common sense publish likewise 2003 student believe paper show liberal bias conservative paper irish rover go production paper publish observer distribute student finally spring 2008 undergraduate journal political science research politic debut | daily student paper notre dame call | observer | [university, notre, dames, student, run, number, news, medium, outlet, studentrun, outlet, include, newspaper, radio, television, station, magazine, journal, begin, onepage, journal, september, 1876, scholastic, magazine, issue, twice, monthly, claim, old, continuous, collegiate, publication, united, states, magazine, juggler, release, twice, year, focus, student, literature, artwork, dome, yearbook, publish, annually, newspaper, vary, publication, interest, observer, publish, daily, mainly, report, university, news, staff, student, notre, dame, saint, marys, college, unlike, scholastic, dome, observer, independent, publication, faculty, advisor, editorial, oversight, university, 1987, student, believe, observer, begin, conservative, bias, liberal, newspaper, common, sense, publish, likewise, 2003, student, believe, paper, show, liberal, bias, conservative, paper, irish, ...] | [daily, student, paper, notre, dame, call] | [observer] |
| 8 | university notre dames student run number news medium outlet studentrun outlet include newspaper radio television station magazine journal begin onepage journal september 1876 scholastic magazine issue twice monthly claim old continuous collegiate publication united states magazine juggler release twice year focus student literature artwork dome yearbook publish annually newspaper vary publication interest observer publish daily mainly report university news staff student notre dame saint marys college unlike scholastic dome observer independent publication faculty advisor editorial oversight university 1987 student believe observer begin conservative bias liberal newspaper common sense publish likewise 2003 student believe paper show liberal bias conservative paper irish rover go production paper publish observer distribute student finally spring 2008 undergraduate journal political science research politic debut | student news paper find notre dame | [university, notre, dames, student, run, number, news, medium, outlet, studentrun, outlet, include, newspaper, radio, television, station, magazine, journal, begin, onepage, journal, september, 1876, scholastic, magazine, issue, twice, monthly, claim, old, continuous, collegiate, publication, united, states, magazine, juggler, release, twice, year, focus, student, literature, artwork, dome, yearbook, publish, annually, newspaper, vary, publication, interest, observer, publish, daily, mainly, report, university, news, staff, student, notre, dame, saint, marys, college, unlike, scholastic, dome, observer, independent, publication, faculty, advisor, editorial, oversight, university, 1987, student, believe, observer, begin, conservative, bias, liberal, newspaper, common, sense, publish, likewise, 2003, student, believe, paper, show, liberal, bias, conservative, paper, irish, ...] | [student, news, paper, find, notre, dame] | [] | |
| 9 | university notre dames student run number news medium outlet studentrun outlet include newspaper radio television station magazine journal begin onepage journal september 1876 scholastic magazine issue twice monthly claim old continuous collegiate publication united states magazine juggler release twice year focus student literature artwork dome yearbook publish annually newspaper vary publication interest observer publish daily mainly report university news staff student notre dame saint marys college unlike scholastic dome observer independent publication faculty advisor editorial oversight university 1987 student believe observer begin conservative bias liberal newspaper common sense publish likewise 2003 student believe paper show liberal bias conservative paper irish rover go production paper publish observer distribute student finally spring 2008 undergraduate journal political science research politic debut | year student paper common sense begin publication notre dame | 1987 | [university, notre, dames, student, run, number, news, medium, outlet, studentrun, outlet, include, newspaper, radio, television, station, magazine, journal, begin, onepage, journal, september, 1876, scholastic, magazine, issue, twice, monthly, claim, old, continuous, collegiate, publication, united, states, magazine, juggler, release, twice, year, focus, student, literature, artwork, dome, yearbook, publish, annually, newspaper, vary, publication, interest, observer, publish, daily, mainly, report, university, news, staff, student, notre, dame, saint, marys, college, unlike, scholastic, dome, observer, independent, publication, faculty, advisor, editorial, oversight, university, 1987, student, believe, observer, begin, conservative, bias, liberal, newspaper, common, sense, publish, likewise, 2003, student, believe, paper, show, liberal, bias, conservative, paper, irish, ...] | [year, student, paper, common, sense, begin, publication, notre, dame] | [1987] |
I have defined a get_pos_and_ner function that performs two tasks:
Here, the way you're processing NER is slightly different from how POS tags are generated
I mean:
Look at the code below for the same.
# Function to get POS tags and named entities
def get_pos_and_ner(sentence):
# Tokenize the sentence
words = word_tokenize(sentence)
# Apply POS tagging to the list of tokens, resulting in tuples of (word, POS tag).
pos_tags = pos_tag(words)
# Create a tree structure of named entities based on the POS tags.
named_entities_tree = ne_chunk(pos_tags)
# Convert the tree into a format that includes the word, POS tag, and a named entity tag
named_entities = tree2conlltags(named_entities_tree)
return pos_tags, named_entities
# Call the defined function to perform POS
data['context_POS'] = data['context'].apply(lambda x: get_pos_and_ner(x)[0])
data['question_POS'] = data['question'].apply(lambda x: get_pos_and_ner(x)[0])
data['answer_POS'] = data['answer'].apply(lambda x: get_pos_and_ner(x)[0])
data['context_NER'] = data['context'].apply(lambda x: get_pos_and_ner(x)[1])
data['question_NER'] = data['question'].apply(lambda x: get_pos_and_ner(x)[1])
data['answer_NER'] = data['answer'].apply(lambda x: get_pos_and_ner(x)[1])
# Display the results
data.head(30)
| context | question | answer | tokenized_context | tokenized_question | tokenized_answer | context_POS | question_POS | answer_POS | context_NER | question_NER | answer_NER | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | architecturally school catholic character atop main building gold dome golden statue virgin mary immediately main building face copper statue christ arm upraise legend venite ad omnes main building basilica sacred heart immediately basilica grotto marian place prayer reflection replica grotto lourdes france virgin mary reputedly appear saint bernadette soubirous 1858 end main drive direct line connect 3 statue gold dome simple modern stone statue mary | virgin mary allegedly appear 1858 lourdes france | saint bernadette soubirous | [architecturally, school, catholic, character, atop, main, building, gold, dome, golden, statue, virgin, mary, immediately, main, building, face, copper, statue, christ, arm, upraise, legend, venite, ad, omnes, main, building, basilica, sacred, heart, immediately, basilica, grotto, marian, place, prayer, reflection, replica, grotto, lourdes, france, virgin, mary, reputedly, appear, saint, bernadette, soubirous, 1858, end, main, drive, direct, line, connect, 3, statue, gold, dome, simple, modern, stone, statue, mary] | [virgin, mary, allegedly, appear, 1858, lourdes, france] | [saint, bernadette, soubirous] | [(architecturally, RB), (school, NN), (catholic, JJ), (character, NN), (atop, NN), (main, JJ), (building, NN), (gold, NN), (dome, NN), (golden, JJ), (statue, NN), (virgin, NN), (mary, JJ), (immediately, RB), (main, JJ), (building, NN), (face, NN), (copper, NN), (statue, NN), (christ, NN), (arm, NN), (upraise, NN), (legend, VBP), (venite, NN), (ad, NN), (omnes, NNS), (main, JJ), (building, NN), (basilica, NN), (sacred, VBD), (heart, NN), (immediately, RB), (basilica, VBD), (grotto, JJ), (marian, JJ), (place, NN), (prayer, NN), (reflection, NN), (replica, NN), (grotto, NN), (lourdes, VBZ), (france, NN), (virgin, NN), (mary, JJ), (reputedly, RB), (appear, VBP), (saint, JJ), (bernadette, NN), (soubirous, JJ), (1858, CD), (end, NN), (main, JJ), (drive, NN), (direct, JJ), (line, NN), (connect, NN), (3, CD), (statue, NN), (gold, NN), (dome, NN), (simple, JJ), (modern, JJ), (stone, NN), (statue, NN), (mary, NN)] | [(virgin, NN), (mary, JJ), (allegedly, RB), (appear, JJ), (1858, CD), (lourdes, NNS), (france, NN)] | [(saint, NN), (bernadette, NN), (soubirous, JJ)] | [(architecturally, RB, O), (school, NN, O), (catholic, JJ, O), (character, NN, O), (atop, NN, O), (main, JJ, O), (building, NN, O), (gold, NN, O), (dome, NN, O), (golden, JJ, O), (statue, NN, O), (virgin, NN, O), (mary, JJ, O), (immediately, RB, O), (main, JJ, O), (building, NN, O), (face, NN, O), (copper, NN, O), (statue, NN, O), (christ, NN, O), (arm, NN, O), (upraise, NN, O), (legend, VBP, O), (venite, NN, O), (ad, NN, O), (omnes, NNS, O), (main, JJ, O), (building, NN, O), (basilica, NN, O), (sacred, VBD, O), (heart, NN, O), (immediately, RB, O), (basilica, VBD, O), (grotto, JJ, O), (marian, JJ, O), (place, NN, O), (prayer, NN, O), (reflection, NN, O), (replica, NN, O), (grotto, NN, O), (lourdes, VBZ, O), (france, NN, O), (virgin, NN, O), (mary, JJ, O), (reputedly, RB, O), (appear, VBP, O), (saint, JJ, O), (bernadette, NN, O), (soubirous, JJ, O), (1858, CD, O), (end, NN, O), (main, JJ, O), (drive, NN, O), (direct, JJ, O), (line, NN, O), (connect, NN, O), (3, CD, O), (statue, NN, O), (gold, NN, O), (dome, NN, O), (simple, JJ, O), (modern, JJ, O), (stone, NN, O), (statue, NN, O), (mary, NN, O)] | [(virgin, NN, O), (mary, JJ, O), (allegedly, RB, O), (appear, JJ, O), (1858, CD, O), (lourdes, NNS, O), (france, NN, O)] | [(saint, NN, O), (bernadette, NN, O), (soubirous, JJ, O)] |
| 1 | architecturally school catholic character atop main building gold dome golden statue virgin mary immediately main building face copper statue christ arm upraise legend venite ad omnes main building basilica sacred heart immediately basilica grotto marian place prayer reflection replica grotto lourdes france virgin mary reputedly appear saint bernadette soubirous 1858 end main drive direct line connect 3 statue gold dome simple modern stone statue mary | notre dame main building | copper statue christ | [architecturally, school, catholic, character, atop, main, building, gold, dome, golden, statue, virgin, mary, immediately, main, building, face, copper, statue, christ, arm, upraise, legend, venite, ad, omnes, main, building, basilica, sacred, heart, immediately, basilica, grotto, marian, place, prayer, reflection, replica, grotto, lourdes, france, virgin, mary, reputedly, appear, saint, bernadette, soubirous, 1858, end, main, drive, direct, line, connect, 3, statue, gold, dome, simple, modern, stone, statue, mary] | [notre, dame, main, building] | [copper, statue, christ] | [(architecturally, RB), (school, NN), (catholic, JJ), (character, NN), (atop, NN), (main, JJ), (building, NN), (gold, NN), (dome, NN), (golden, JJ), (statue, NN), (virgin, NN), (mary, JJ), (immediately, RB), (main, JJ), (building, NN), (face, NN), (copper, NN), (statue, NN), (christ, NN), (arm, NN), (upraise, NN), (legend, VBP), (venite, NN), (ad, NN), (omnes, NNS), (main, JJ), (building, NN), (basilica, NN), (sacred, VBD), (heart, NN), (immediately, RB), (basilica, VBD), (grotto, JJ), (marian, JJ), (place, NN), (prayer, NN), (reflection, NN), (replica, NN), (grotto, NN), (lourdes, VBZ), (france, NN), (virgin, NN), (mary, JJ), (reputedly, RB), (appear, VBP), (saint, JJ), (bernadette, NN), (soubirous, JJ), (1858, CD), (end, NN), (main, JJ), (drive, NN), (direct, JJ), (line, NN), (connect, NN), (3, CD), (statue, NN), (gold, NN), (dome, NN), (simple, JJ), (modern, JJ), (stone, NN), (statue, NN), (mary, NN)] | [(notre, JJ), (dame, NN), (main, JJ), (building, NN)] | [(copper, NN), (statue, NN), (christ, NN)] | [(architecturally, RB, O), (school, NN, O), (catholic, JJ, O), (character, NN, O), (atop, NN, O), (main, JJ, O), (building, NN, O), (gold, NN, O), (dome, NN, O), (golden, JJ, O), (statue, NN, O), (virgin, NN, O), (mary, JJ, O), (immediately, RB, O), (main, JJ, O), (building, NN, O), (face, NN, O), (copper, NN, O), (statue, NN, O), (christ, NN, O), (arm, NN, O), (upraise, NN, O), (legend, VBP, O), (venite, NN, O), (ad, NN, O), (omnes, NNS, O), (main, JJ, O), (building, NN, O), (basilica, NN, O), (sacred, VBD, O), (heart, NN, O), (immediately, RB, O), (basilica, VBD, O), (grotto, JJ, O), (marian, JJ, O), (place, NN, O), (prayer, NN, O), (reflection, NN, O), (replica, NN, O), (grotto, NN, O), (lourdes, VBZ, O), (france, NN, O), (virgin, NN, O), (mary, JJ, O), (reputedly, RB, O), (appear, VBP, O), (saint, JJ, O), (bernadette, NN, O), (soubirous, JJ, O), (1858, CD, O), (end, NN, O), (main, JJ, O), (drive, NN, O), (direct, JJ, O), (line, NN, O), (connect, NN, O), (3, CD, O), (statue, NN, O), (gold, NN, O), (dome, NN, O), (simple, JJ, O), (modern, JJ, O), (stone, NN, O), (statue, NN, O), (mary, NN, O)] | [(notre, JJ, O), (dame, NN, O), (main, JJ, O), (building, NN, O)] | [(copper, NN, O), (statue, NN, O), (christ, NN, O)] |
| 2 | architecturally school catholic character atop main building gold dome golden statue virgin mary immediately main building face copper statue christ arm upraise legend venite ad omnes main building basilica sacred heart immediately basilica grotto marian place prayer reflection replica grotto lourdes france virgin mary reputedly appear saint bernadette soubirous 1858 end main drive direct line connect 3 statue gold dome simple modern stone statue mary | basilica sacred heart notre dame structure | main building | [architecturally, school, catholic, character, atop, main, building, gold, dome, golden, statue, virgin, mary, immediately, main, building, face, copper, statue, christ, arm, upraise, legend, venite, ad, omnes, main, building, basilica, sacred, heart, immediately, basilica, grotto, marian, place, prayer, reflection, replica, grotto, lourdes, france, virgin, mary, reputedly, appear, saint, bernadette, soubirous, 1858, end, main, drive, direct, line, connect, 3, statue, gold, dome, simple, modern, stone, statue, mary] | [basilica, sacred, heart, notre, dame, structure] | [main, building] | [(architecturally, RB), (school, NN), (catholic, JJ), (character, NN), (atop, NN), (main, JJ), (building, NN), (gold, NN), (dome, NN), (golden, JJ), (statue, NN), (virgin, NN), (mary, JJ), (immediately, RB), (main, JJ), (building, NN), (face, NN), (copper, NN), (statue, NN), (christ, NN), (arm, NN), (upraise, NN), (legend, VBP), (venite, NN), (ad, NN), (omnes, NNS), (main, JJ), (building, NN), (basilica, NN), (sacred, VBD), (heart, NN), (immediately, RB), (basilica, VBD), (grotto, JJ), (marian, JJ), (place, NN), (prayer, NN), (reflection, NN), (replica, NN), (grotto, NN), (lourdes, VBZ), (france, NN), (virgin, NN), (mary, JJ), (reputedly, RB), (appear, VBP), (saint, JJ), (bernadette, NN), (soubirous, JJ), (1858, CD), (end, NN), (main, JJ), (drive, NN), (direct, JJ), (line, NN), (connect, NN), (3, CD), (statue, NN), (gold, NN), (dome, NN), (simple, JJ), (modern, JJ), (stone, NN), (statue, NN), (mary, NN)] | [(basilica, NN), (sacred, VBD), (heart, NN), (notre, NN), (dame, NN), (structure, NN)] | [(main, JJ), (building, NN)] | [(architecturally, RB, O), (school, NN, O), (catholic, JJ, O), (character, NN, O), (atop, NN, O), (main, JJ, O), (building, NN, O), (gold, NN, O), (dome, NN, O), (golden, JJ, O), (statue, NN, O), (virgin, NN, O), (mary, JJ, O), (immediately, RB, O), (main, JJ, O), (building, NN, O), (face, NN, O), (copper, NN, O), (statue, NN, O), (christ, NN, O), (arm, NN, O), (upraise, NN, O), (legend, VBP, O), (venite, NN, O), (ad, NN, O), (omnes, NNS, O), (main, JJ, O), (building, NN, O), (basilica, NN, O), (sacred, VBD, O), (heart, NN, O), (immediately, RB, O), (basilica, VBD, O), (grotto, JJ, O), (marian, JJ, O), (place, NN, O), (prayer, NN, O), (reflection, NN, O), (replica, NN, O), (grotto, NN, O), (lourdes, VBZ, O), (france, NN, O), (virgin, NN, O), (mary, JJ, O), (reputedly, RB, O), (appear, VBP, O), (saint, JJ, O), (bernadette, NN, O), (soubirous, JJ, O), (1858, CD, O), (end, NN, O), (main, JJ, O), (drive, NN, O), (direct, JJ, O), (line, NN, O), (connect, NN, O), (3, CD, O), (statue, NN, O), (gold, NN, O), (dome, NN, O), (simple, JJ, O), (modern, JJ, O), (stone, NN, O), (statue, NN, O), (mary, NN, O)] | [(basilica, NN, O), (sacred, VBD, O), (heart, NN, O), (notre, NN, O), (dame, NN, O), (structure, NN, O)] | [(main, JJ, O), (building, NN, O)] |
| 3 | architecturally school catholic character atop main building gold dome golden statue virgin mary immediately main building face copper statue christ arm upraise legend venite ad omnes main building basilica sacred heart immediately basilica grotto marian place prayer reflection replica grotto lourdes france virgin mary reputedly appear saint bernadette soubirous 1858 end main drive direct line connect 3 statue gold dome simple modern stone statue mary | grotto notre dame | marian place prayer reflection | [architecturally, school, catholic, character, atop, main, building, gold, dome, golden, statue, virgin, mary, immediately, main, building, face, copper, statue, christ, arm, upraise, legend, venite, ad, omnes, main, building, basilica, sacred, heart, immediately, basilica, grotto, marian, place, prayer, reflection, replica, grotto, lourdes, france, virgin, mary, reputedly, appear, saint, bernadette, soubirous, 1858, end, main, drive, direct, line, connect, 3, statue, gold, dome, simple, modern, stone, statue, mary] | [grotto, notre, dame] | [marian, place, prayer, reflection] | [(architecturally, RB), (school, NN), (catholic, JJ), (character, NN), (atop, NN), (main, JJ), (building, NN), (gold, NN), (dome, NN), (golden, JJ), (statue, NN), (virgin, NN), (mary, JJ), (immediately, RB), (main, JJ), (building, NN), (face, NN), (copper, NN), (statue, NN), (christ, NN), (arm, NN), (upraise, NN), (legend, VBP), (venite, NN), (ad, NN), (omnes, NNS), (main, JJ), (building, NN), (basilica, NN), (sacred, VBD), (heart, NN), (immediately, RB), (basilica, VBD), (grotto, JJ), (marian, JJ), (place, NN), (prayer, NN), (reflection, NN), (replica, NN), (grotto, NN), (lourdes, VBZ), (france, NN), (virgin, NN), (mary, JJ), (reputedly, RB), (appear, VBP), (saint, JJ), (bernadette, NN), (soubirous, JJ), (1858, CD), (end, NN), (main, JJ), (drive, NN), (direct, JJ), (line, NN), (connect, NN), (3, CD), (statue, NN), (gold, NN), (dome, NN), (simple, JJ), (modern, JJ), (stone, NN), (statue, NN), (mary, NN)] | [(grotto, NN), (notre, NNS), (dame, NN)] | [(marian, JJ), (place, NN), (prayer, NN), (reflection, NN)] | [(architecturally, RB, O), (school, NN, O), (catholic, JJ, O), (character, NN, O), (atop, NN, O), (main, JJ, O), (building, NN, O), (gold, NN, O), (dome, NN, O), (golden, JJ, O), (statue, NN, O), (virgin, NN, O), (mary, JJ, O), (immediately, RB, O), (main, JJ, O), (building, NN, O), (face, NN, O), (copper, NN, O), (statue, NN, O), (christ, NN, O), (arm, NN, O), (upraise, NN, O), (legend, VBP, O), (venite, NN, O), (ad, NN, O), (omnes, NNS, O), (main, JJ, O), (building, NN, O), (basilica, NN, O), (sacred, VBD, O), (heart, NN, O), (immediately, RB, O), (basilica, VBD, O), (grotto, JJ, O), (marian, JJ, O), (place, NN, O), (prayer, NN, O), (reflection, NN, O), (replica, NN, O), (grotto, NN, O), (lourdes, VBZ, O), (france, NN, O), (virgin, NN, O), (mary, JJ, O), (reputedly, RB, O), (appear, VBP, O), (saint, JJ, O), (bernadette, NN, O), (soubirous, JJ, O), (1858, CD, O), (end, NN, O), (main, JJ, O), (drive, NN, O), (direct, JJ, O), (line, NN, O), (connect, NN, O), (3, CD, O), (statue, NN, O), (gold, NN, O), (dome, NN, O), (simple, JJ, O), (modern, JJ, O), (stone, NN, O), (statue, NN, O), (mary, NN, O)] | [(grotto, NN, O), (notre, NNS, O), (dame, NN, O)] | [(marian, JJ, O), (place, NN, O), (prayer, NN, O), (reflection, NN, O)] |
| 4 | architecturally school catholic character atop main building gold dome golden statue virgin mary immediately main building face copper statue christ arm upraise legend venite ad omnes main building basilica sacred heart immediately basilica grotto marian place prayer reflection replica grotto lourdes france virgin mary reputedly appear saint bernadette soubirous 1858 end main drive direct line connect 3 statue gold dome simple modern stone statue mary | sit main building notre dame | golden statue virgin mary | [architecturally, school, catholic, character, atop, main, building, gold, dome, golden, statue, virgin, mary, immediately, main, building, face, copper, statue, christ, arm, upraise, legend, venite, ad, omnes, main, building, basilica, sacred, heart, immediately, basilica, grotto, marian, place, prayer, reflection, replica, grotto, lourdes, france, virgin, mary, reputedly, appear, saint, bernadette, soubirous, 1858, end, main, drive, direct, line, connect, 3, statue, gold, dome, simple, modern, stone, statue, mary] | [sit, main, building, notre, dame] | [golden, statue, virgin, mary] | [(architecturally, RB), (school, NN), (catholic, JJ), (character, NN), (atop, NN), (main, JJ), (building, NN), (gold, NN), (dome, NN), (golden, JJ), (statue, NN), (virgin, NN), (mary, JJ), (immediately, RB), (main, JJ), (building, NN), (face, NN), (copper, NN), (statue, NN), (christ, NN), (arm, NN), (upraise, NN), (legend, VBP), (venite, NN), (ad, NN), (omnes, NNS), (main, JJ), (building, NN), (basilica, NN), (sacred, VBD), (heart, NN), (immediately, RB), (basilica, VBD), (grotto, JJ), (marian, JJ), (place, NN), (prayer, NN), (reflection, NN), (replica, NN), (grotto, NN), (lourdes, VBZ), (france, NN), (virgin, NN), (mary, JJ), (reputedly, RB), (appear, VBP), (saint, JJ), (bernadette, NN), (soubirous, JJ), (1858, CD), (end, NN), (main, JJ), (drive, NN), (direct, JJ), (line, NN), (connect, NN), (3, CD), (statue, NN), (gold, NN), (dome, NN), (simple, JJ), (modern, JJ), (stone, NN), (statue, NN), (mary, NN)] | [(sit, NN), (main, JJ), (building, NN), (notre, NN), (dame, NN)] | [(golden, JJ), (statue, NN), (virgin, NN), (mary, NN)] | [(architecturally, RB, O), (school, NN, O), (catholic, JJ, O), (character, NN, O), (atop, NN, O), (main, JJ, O), (building, NN, O), (gold, NN, O), (dome, NN, O), (golden, JJ, O), (statue, NN, O), (virgin, NN, O), (mary, JJ, O), (immediately, RB, O), (main, JJ, O), (building, NN, O), (face, NN, O), (copper, NN, O), (statue, NN, O), (christ, NN, O), (arm, NN, O), (upraise, NN, O), (legend, VBP, O), (venite, NN, O), (ad, NN, O), (omnes, NNS, O), (main, JJ, O), (building, NN, O), (basilica, NN, O), (sacred, VBD, O), (heart, NN, O), (immediately, RB, O), (basilica, VBD, O), (grotto, JJ, O), (marian, JJ, O), (place, NN, O), (prayer, NN, O), (reflection, NN, O), (replica, NN, O), (grotto, NN, O), (lourdes, VBZ, O), (france, NN, O), (virgin, NN, O), (mary, JJ, O), (reputedly, RB, O), (appear, VBP, O), (saint, JJ, O), (bernadette, NN, O), (soubirous, JJ, O), (1858, CD, O), (end, NN, O), (main, JJ, O), (drive, NN, O), (direct, JJ, O), (line, NN, O), (connect, NN, O), (3, CD, O), (statue, NN, O), (gold, NN, O), (dome, NN, O), (simple, JJ, O), (modern, JJ, O), (stone, NN, O), (statue, NN, O), (mary, NN, O)] | [(sit, NN, O), (main, JJ, O), (building, NN, O), (notre, NN, O), (dame, NN, O)] | [(golden, JJ, O), (statue, NN, O), (virgin, NN, O), (mary, NN, O)] |
| 5 | university notre dames student run number news medium outlet studentrun outlet include newspaper radio television station magazine journal begin onepage journal september 1876 scholastic magazine issue twice monthly claim old continuous collegiate publication united states magazine juggler release twice year focus student literature artwork dome yearbook publish annually newspaper vary publication interest observer publish daily mainly report university news staff student notre dame saint marys college unlike scholastic dome observer independent publication faculty advisor editorial oversight university 1987 student believe observer begin conservative bias liberal newspaper common sense publish likewise 2003 student believe paper show liberal bias conservative paper irish rover go production paper publish observer distribute student finally spring 2008 undergraduate journal political science research politic debut | scholastic magazine notre dame begin publish | september 1876 | [university, notre, dames, student, run, number, news, medium, outlet, studentrun, outlet, include, newspaper, radio, television, station, magazine, journal, begin, onepage, journal, september, 1876, scholastic, magazine, issue, twice, monthly, claim, old, continuous, collegiate, publication, united, states, magazine, juggler, release, twice, year, focus, student, literature, artwork, dome, yearbook, publish, annually, newspaper, vary, publication, interest, observer, publish, daily, mainly, report, university, news, staff, student, notre, dame, saint, marys, college, unlike, scholastic, dome, observer, independent, publication, faculty, advisor, editorial, oversight, university, 1987, student, believe, observer, begin, conservative, bias, liberal, newspaper, common, sense, publish, likewise, 2003, student, believe, paper, show, liberal, bias, conservative, paper, irish, ...] | [scholastic, magazine, notre, dame, begin, publish] | [september, 1876] | [(university, NN), (notre, CC), (dames, JJ), (student, NN), (run, VB), (number, NN), (news, NN), (medium, NN), (outlet, NN), (studentrun, VB), (outlet, NN), (include, VBP), (newspaper, NN), (radio, NN), (television, NN), (station, NN), (magazine, NN), (journal, NN), (begin, JJ), (onepage, JJ), (journal, NN), (september, NN), (1876, CD), (scholastic, JJ), (magazine, NN), (issue, NN), (twice, RB), (monthly, JJ), (claim, NN), (old, JJ), (continuous, JJ), (collegiate, NN), (publication, NN), (united, VBD), (states, NNS), (magazine, NN), (juggler, NN), (release, NN), (twice, RB), (year, NN), (focus, NN), (student, NN), (literature, NN), (artwork, VBZ), (dome, JJ), (yearbook, NN), (publish, VB), (annually, RB), (newspaper, NN), (vary, JJ), (publication, NN), (interest, NN), (observer, IN), (publish, JJ), (daily, JJ), (mainly, RB), (report, VBP), (university, JJ), (news, NN), (staff, NN), (student, NN), (notre, NN), (dame, NN), (saint, NN), (marys, NN), (college, NN), (unlike, IN), (scholastic, JJ), (dome, NN), (observer, NN), (independent, JJ), (publication, NN), (faculty, NN), (advisor, NN), (editorial, NN), (oversight, NN), (university, NN), (1987, CD), (student, NN), (believe, VBP), (observer, IN), (begin, VBP), (conservative, JJ), (bias, NN), (liberal, JJ), (newspaper, NN), (common, JJ), (sense, NN), (publish, JJ), (likewise, NN), (2003, CD), (student, NN), (believe, VBP), (paper, NN), (show, NN), (liberal, JJ), (bias, RB), (conservative, JJ), (paper, NN), (irish, NN), ...] | [(scholastic, JJ), (magazine, NN), (notre, NN), (dame, NN), (begin, VBP), (publish, VB)] | [(september, NN), (1876, CD)] | [(university, NN, O), (notre, CC, O), (dames, JJ, O), (student, NN, O), (run, VB, O), (number, NN, O), (news, NN, O), (medium, NN, O), (outlet, NN, O), (studentrun, VB, O), (outlet, NN, O), (include, VBP, O), (newspaper, NN, O), (radio, NN, O), (television, NN, O), (station, NN, O), (magazine, NN, O), (journal, NN, O), (begin, JJ, O), (onepage, JJ, O), (journal, NN, O), (september, NN, O), (1876, CD, O), (scholastic, JJ, O), (magazine, NN, O), (issue, NN, O), (twice, RB, O), (monthly, JJ, O), (claim, NN, O), (old, JJ, O), (continuous, JJ, O), (collegiate, NN, O), (publication, NN, O), (united, VBD, O), (states, NNS, O), (magazine, NN, O), (juggler, NN, O), (release, NN, O), (twice, RB, O), (year, NN, O), (focus, NN, O), (student, NN, O), (literature, NN, O), (artwork, VBZ, O), (dome, JJ, O), (yearbook, NN, O), (publish, VB, O), (annually, RB, O), (newspaper, NN, O), (vary, JJ, O), (publication, NN, O), (interest, NN, O), (observer, IN, O), (publish, JJ, O), (daily, JJ, O), (mainly, RB, O), (report, VBP, O), (university, JJ, O), (news, NN, O), (staff, NN, O), (student, NN, O), (notre, NN, O), (dame, NN, O), (saint, NN, O), (marys, NN, O), (college, NN, O), (unlike, IN, O), (scholastic, JJ, O), (dome, NN, O), (observer, NN, O), (independent, JJ, O), (publication, NN, O), (faculty, NN, O), (advisor, NN, O), (editorial, NN, O), (oversight, NN, O), (university, NN, O), (1987, CD, O), (student, NN, O), (believe, VBP, O), (observer, IN, O), (begin, VBP, O), (conservative, JJ, O), (bias, NN, O), (liberal, JJ, O), (newspaper, NN, O), (common, JJ, O), (sense, NN, O), (publish, JJ, O), (likewise, NN, O), (2003, CD, O), (student, NN, O), (believe, VBP, O), (paper, NN, O), (show, NN, O), (liberal, JJ, O), (bias, RB, O), (conservative, JJ, O), (paper, NN, O), (irish, NN, O), ...] | [(scholastic, JJ, O), (magazine, NN, O), (notre, NN, O), (dame, NN, O), (begin, VBP, O), (publish, VB, O)] | [(september, NN, O), (1876, CD, O)] |
| 6 | university notre dames student run number news medium outlet studentrun outlet include newspaper radio television station magazine journal begin onepage journal september 1876 scholastic magazine issue twice monthly claim old continuous collegiate publication united states magazine juggler release twice year focus student literature artwork dome yearbook publish annually newspaper vary publication interest observer publish daily mainly report university news staff student notre dame saint marys college unlike scholastic dome observer independent publication faculty advisor editorial oversight university 1987 student believe observer begin conservative bias liberal newspaper common sense publish likewise 2003 student believe paper show liberal bias conservative paper irish rover go production paper publish observer distribute student finally spring 2008 undergraduate journal political science research politic debut | notre dames juggler publish | twice | [university, notre, dames, student, run, number, news, medium, outlet, studentrun, outlet, include, newspaper, radio, television, station, magazine, journal, begin, onepage, journal, september, 1876, scholastic, magazine, issue, twice, monthly, claim, old, continuous, collegiate, publication, united, states, magazine, juggler, release, twice, year, focus, student, literature, artwork, dome, yearbook, publish, annually, newspaper, vary, publication, interest, observer, publish, daily, mainly, report, university, news, staff, student, notre, dame, saint, marys, college, unlike, scholastic, dome, observer, independent, publication, faculty, advisor, editorial, oversight, university, 1987, student, believe, observer, begin, conservative, bias, liberal, newspaper, common, sense, publish, likewise, 2003, student, believe, paper, show, liberal, bias, conservative, paper, irish, ...] | [notre, dames, juggler, publish] | [twice] | [(university, NN), (notre, CC), (dames, JJ), (student, NN), (run, VB), (number, NN), (news, NN), (medium, NN), (outlet, NN), (studentrun, VB), (outlet, NN), (include, VBP), (newspaper, NN), (radio, NN), (television, NN), (station, NN), (magazine, NN), (journal, NN), (begin, JJ), (onepage, JJ), (journal, NN), (september, NN), (1876, CD), (scholastic, JJ), (magazine, NN), (issue, NN), (twice, RB), (monthly, JJ), (claim, NN), (old, JJ), (continuous, JJ), (collegiate, NN), (publication, NN), (united, VBD), (states, NNS), (magazine, NN), (juggler, NN), (release, NN), (twice, RB), (year, NN), (focus, NN), (student, NN), (literature, NN), (artwork, VBZ), (dome, JJ), (yearbook, NN), (publish, VB), (annually, RB), (newspaper, NN), (vary, JJ), (publication, NN), (interest, NN), (observer, IN), (publish, JJ), (daily, JJ), (mainly, RB), (report, VBP), (university, JJ), (news, NN), (staff, NN), (student, NN), (notre, NN), (dame, NN), (saint, NN), (marys, NN), (college, NN), (unlike, IN), (scholastic, JJ), (dome, NN), (observer, NN), (independent, JJ), (publication, NN), (faculty, NN), (advisor, NN), (editorial, NN), (oversight, NN), (university, NN), (1987, CD), (student, NN), (believe, VBP), (observer, IN), (begin, VBP), (conservative, JJ), (bias, NN), (liberal, JJ), (newspaper, NN), (common, JJ), (sense, NN), (publish, JJ), (likewise, NN), (2003, CD), (student, NN), (believe, VBP), (paper, NN), (show, NN), (liberal, JJ), (bias, RB), (conservative, JJ), (paper, NN), (irish, NN), ...] | [(notre, JJ), (dames, NNS), (juggler, VBP), (publish, JJ)] | [(twice, RB)] | [(university, NN, O), (notre, CC, O), (dames, JJ, O), (student, NN, O), (run, VB, O), (number, NN, O), (news, NN, O), (medium, NN, O), (outlet, NN, O), (studentrun, VB, O), (outlet, NN, O), (include, VBP, O), (newspaper, NN, O), (radio, NN, O), (television, NN, O), (station, NN, O), (magazine, NN, O), (journal, NN, O), (begin, JJ, O), (onepage, JJ, O), (journal, NN, O), (september, NN, O), (1876, CD, O), (scholastic, JJ, O), (magazine, NN, O), (issue, NN, O), (twice, RB, O), (monthly, JJ, O), (claim, NN, O), (old, JJ, O), (continuous, JJ, O), (collegiate, NN, O), (publication, NN, O), (united, VBD, O), (states, NNS, O), (magazine, NN, O), (juggler, NN, O), (release, NN, O), (twice, RB, O), (year, NN, O), (focus, NN, O), (student, NN, O), (literature, NN, O), (artwork, VBZ, O), (dome, JJ, O), (yearbook, NN, O), (publish, VB, O), (annually, RB, O), (newspaper, NN, O), (vary, JJ, O), (publication, NN, O), (interest, NN, O), (observer, IN, O), (publish, JJ, O), (daily, JJ, O), (mainly, RB, O), (report, VBP, O), (university, JJ, O), (news, NN, O), (staff, NN, O), (student, NN, O), (notre, NN, O), (dame, NN, O), (saint, NN, O), (marys, NN, O), (college, NN, O), (unlike, IN, O), (scholastic, JJ, O), (dome, NN, O), (observer, NN, O), (independent, JJ, O), (publication, NN, O), (faculty, NN, O), (advisor, NN, O), (editorial, NN, O), (oversight, NN, O), (university, NN, O), (1987, CD, O), (student, NN, O), (believe, VBP, O), (observer, IN, O), (begin, VBP, O), (conservative, JJ, O), (bias, NN, O), (liberal, JJ, O), (newspaper, NN, O), (common, JJ, O), (sense, NN, O), (publish, JJ, O), (likewise, NN, O), (2003, CD, O), (student, NN, O), (believe, VBP, O), (paper, NN, O), (show, NN, O), (liberal, JJ, O), (bias, RB, O), (conservative, JJ, O), (paper, NN, O), (irish, NN, O), ...] | [(notre, JJ, O), (dames, NNS, O), (juggler, VBP, O), (publish, JJ, O)] | [(twice, RB, O)] |
| 7 | university notre dames student run number news medium outlet studentrun outlet include newspaper radio television station magazine journal begin onepage journal september 1876 scholastic magazine issue twice monthly claim old continuous collegiate publication united states magazine juggler release twice year focus student literature artwork dome yearbook publish annually newspaper vary publication interest observer publish daily mainly report university news staff student notre dame saint marys college unlike scholastic dome observer independent publication faculty advisor editorial oversight university 1987 student believe observer begin conservative bias liberal newspaper common sense publish likewise 2003 student believe paper show liberal bias conservative paper irish rover go production paper publish observer distribute student finally spring 2008 undergraduate journal political science research politic debut | daily student paper notre dame call | observer | [university, notre, dames, student, run, number, news, medium, outlet, studentrun, outlet, include, newspaper, radio, television, station, magazine, journal, begin, onepage, journal, september, 1876, scholastic, magazine, issue, twice, monthly, claim, old, continuous, collegiate, publication, united, states, magazine, juggler, release, twice, year, focus, student, literature, artwork, dome, yearbook, publish, annually, newspaper, vary, publication, interest, observer, publish, daily, mainly, report, university, news, staff, student, notre, dame, saint, marys, college, unlike, scholastic, dome, observer, independent, publication, faculty, advisor, editorial, oversight, university, 1987, student, believe, observer, begin, conservative, bias, liberal, newspaper, common, sense, publish, likewise, 2003, student, believe, paper, show, liberal, bias, conservative, paper, irish, ...] | [daily, student, paper, notre, dame, call] | [observer] | [(university, NN), (notre, CC), (dames, JJ), (student, NN), (run, VB), (number, NN), (news, NN), (medium, NN), (outlet, NN), (studentrun, VB), (outlet, NN), (include, VBP), (newspaper, NN), (radio, NN), (television, NN), (station, NN), (magazine, NN), (journal, NN), (begin, JJ), (onepage, JJ), (journal, NN), (september, NN), (1876, CD), (scholastic, JJ), (magazine, NN), (issue, NN), (twice, RB), (monthly, JJ), (claim, NN), (old, JJ), (continuous, JJ), (collegiate, NN), (publication, NN), (united, VBD), (states, NNS), (magazine, NN), (juggler, NN), (release, NN), (twice, RB), (year, NN), (focus, NN), (student, NN), (literature, NN), (artwork, VBZ), (dome, JJ), (yearbook, NN), (publish, VB), (annually, RB), (newspaper, NN), (vary, JJ), (publication, NN), (interest, NN), (observer, IN), (publish, JJ), (daily, JJ), (mainly, RB), (report, VBP), (university, JJ), (news, NN), (staff, NN), (student, NN), (notre, NN), (dame, NN), (saint, NN), (marys, NN), (college, NN), (unlike, IN), (scholastic, JJ), (dome, NN), (observer, NN), (independent, JJ), (publication, NN), (faculty, NN), (advisor, NN), (editorial, NN), (oversight, NN), (university, NN), (1987, CD), (student, NN), (believe, VBP), (observer, IN), (begin, VBP), (conservative, JJ), (bias, NN), (liberal, JJ), (newspaper, NN), (common, JJ), (sense, NN), (publish, JJ), (likewise, NN), (2003, CD), (student, NN), (believe, VBP), (paper, NN), (show, NN), (liberal, JJ), (bias, RB), (conservative, JJ), (paper, NN), (irish, NN), ...] | [(daily, JJ), (student, NN), (paper, NN), (notre, FW), (dame, NN), (call, NN)] | [(observer, NN)] | [(university, NN, O), (notre, CC, O), (dames, JJ, O), (student, NN, O), (run, VB, O), (number, NN, O), (news, NN, O), (medium, NN, O), (outlet, NN, O), (studentrun, VB, O), (outlet, NN, O), (include, VBP, O), (newspaper, NN, O), (radio, NN, O), (television, NN, O), (station, NN, O), (magazine, NN, O), (journal, NN, O), (begin, JJ, O), (onepage, JJ, O), (journal, NN, O), (september, NN, O), (1876, CD, O), (scholastic, JJ, O), (magazine, NN, O), (issue, NN, O), (twice, RB, O), (monthly, JJ, O), (claim, NN, O), (old, JJ, O), (continuous, JJ, O), (collegiate, NN, O), (publication, NN, O), (united, VBD, O), (states, NNS, O), (magazine, NN, O), (juggler, NN, O), (release, NN, O), (twice, RB, O), (year, NN, O), (focus, NN, O), (student, NN, O), (literature, NN, O), (artwork, VBZ, O), (dome, JJ, O), (yearbook, NN, O), (publish, VB, O), (annually, RB, O), (newspaper, NN, O), (vary, JJ, O), (publication, NN, O), (interest, NN, O), (observer, IN, O), (publish, JJ, O), (daily, JJ, O), (mainly, RB, O), (report, VBP, O), (university, JJ, O), (news, NN, O), (staff, NN, O), (student, NN, O), (notre, NN, O), (dame, NN, O), (saint, NN, O), (marys, NN, O), (college, NN, O), (unlike, IN, O), (scholastic, JJ, O), (dome, NN, O), (observer, NN, O), (independent, JJ, O), (publication, NN, O), (faculty, NN, O), (advisor, NN, O), (editorial, NN, O), (oversight, NN, O), (university, NN, O), (1987, CD, O), (student, NN, O), (believe, VBP, O), (observer, IN, O), (begin, VBP, O), (conservative, JJ, O), (bias, NN, O), (liberal, JJ, O), (newspaper, NN, O), (common, JJ, O), (sense, NN, O), (publish, JJ, O), (likewise, NN, O), (2003, CD, O), (student, NN, O), (believe, VBP, O), (paper, NN, O), (show, NN, O), (liberal, JJ, O), (bias, RB, O), (conservative, JJ, O), (paper, NN, O), (irish, NN, O), ...] | [(daily, JJ, O), (student, NN, O), (paper, NN, O), (notre, FW, O), (dame, NN, O), (call, NN, O)] | [(observer, NN, O)] |
| 8 | university notre dames student run number news medium outlet studentrun outlet include newspaper radio television station magazine journal begin onepage journal september 1876 scholastic magazine issue twice monthly claim old continuous collegiate publication united states magazine juggler release twice year focus student literature artwork dome yearbook publish annually newspaper vary publication interest observer publish daily mainly report university news staff student notre dame saint marys college unlike scholastic dome observer independent publication faculty advisor editorial oversight university 1987 student believe observer begin conservative bias liberal newspaper common sense publish likewise 2003 student believe paper show liberal bias conservative paper irish rover go production paper publish observer distribute student finally spring 2008 undergraduate journal political science research politic debut | student news paper find notre dame | [university, notre, dames, student, run, number, news, medium, outlet, studentrun, outlet, include, newspaper, radio, television, station, magazine, journal, begin, onepage, journal, september, 1876, scholastic, magazine, issue, twice, monthly, claim, old, continuous, collegiate, publication, united, states, magazine, juggler, release, twice, year, focus, student, literature, artwork, dome, yearbook, publish, annually, newspaper, vary, publication, interest, observer, publish, daily, mainly, report, university, news, staff, student, notre, dame, saint, marys, college, unlike, scholastic, dome, observer, independent, publication, faculty, advisor, editorial, oversight, university, 1987, student, believe, observer, begin, conservative, bias, liberal, newspaper, common, sense, publish, likewise, 2003, student, believe, paper, show, liberal, bias, conservative, paper, irish, ...] | [student, news, paper, find, notre, dame] | [] | [(university, NN), (notre, CC), (dames, JJ), (student, NN), (run, VB), (number, NN), (news, NN), (medium, NN), (outlet, NN), (studentrun, VB), (outlet, NN), (include, VBP), (newspaper, NN), (radio, NN), (television, NN), (station, NN), (magazine, NN), (journal, NN), (begin, JJ), (onepage, JJ), (journal, NN), (september, NN), (1876, CD), (scholastic, JJ), (magazine, NN), (issue, NN), (twice, RB), (monthly, JJ), (claim, NN), (old, JJ), (continuous, JJ), (collegiate, NN), (publication, NN), (united, VBD), (states, NNS), (magazine, NN), (juggler, NN), (release, NN), (twice, RB), (year, NN), (focus, NN), (student, NN), (literature, NN), (artwork, VBZ), (dome, JJ), (yearbook, NN), (publish, VB), (annually, RB), (newspaper, NN), (vary, JJ), (publication, NN), (interest, NN), (observer, IN), (publish, JJ), (daily, JJ), (mainly, RB), (report, VBP), (university, JJ), (news, NN), (staff, NN), (student, NN), (notre, NN), (dame, NN), (saint, NN), (marys, NN), (college, NN), (unlike, IN), (scholastic, JJ), (dome, NN), (observer, NN), (independent, JJ), (publication, NN), (faculty, NN), (advisor, NN), (editorial, NN), (oversight, NN), (university, NN), (1987, CD), (student, NN), (believe, VBP), (observer, IN), (begin, VBP), (conservative, JJ), (bias, NN), (liberal, JJ), (newspaper, NN), (common, JJ), (sense, NN), (publish, JJ), (likewise, NN), (2003, CD), (student, NN), (believe, VBP), (paper, NN), (show, NN), (liberal, JJ), (bias, RB), (conservative, JJ), (paper, NN), (irish, NN), ...] | [(student, NN), (news, NN), (paper, NN), (find, VBP), (notre, JJ), (dame, NN)] | [] | [(university, NN, O), (notre, CC, O), (dames, JJ, O), (student, NN, O), (run, VB, O), (number, NN, O), (news, NN, O), (medium, NN, O), (outlet, NN, O), (studentrun, VB, O), (outlet, NN, O), (include, VBP, O), (newspaper, NN, O), (radio, NN, O), (television, NN, O), (station, NN, O), (magazine, NN, O), (journal, NN, O), (begin, JJ, O), (onepage, JJ, O), (journal, NN, O), (september, NN, O), (1876, CD, O), (scholastic, JJ, O), (magazine, NN, O), (issue, NN, O), (twice, RB, O), (monthly, JJ, O), (claim, NN, O), (old, JJ, O), (continuous, JJ, O), (collegiate, NN, O), (publication, NN, O), (united, VBD, O), (states, NNS, O), (magazine, NN, O), (juggler, NN, O), (release, NN, O), (twice, RB, O), (year, NN, O), (focus, NN, O), (student, NN, O), (literature, NN, O), (artwork, VBZ, O), (dome, JJ, O), (yearbook, NN, O), (publish, VB, O), (annually, RB, O), (newspaper, NN, O), (vary, JJ, O), (publication, NN, O), (interest, NN, O), (observer, IN, O), (publish, JJ, O), (daily, JJ, O), (mainly, RB, O), (report, VBP, O), (university, JJ, O), (news, NN, O), (staff, NN, O), (student, NN, O), (notre, NN, O), (dame, NN, O), (saint, NN, O), (marys, NN, O), (college, NN, O), (unlike, IN, O), (scholastic, JJ, O), (dome, NN, O), (observer, NN, O), (independent, JJ, O), (publication, NN, O), (faculty, NN, O), (advisor, NN, O), (editorial, NN, O), (oversight, NN, O), (university, NN, O), (1987, CD, O), (student, NN, O), (believe, VBP, O), (observer, IN, O), (begin, VBP, O), (conservative, JJ, O), (bias, NN, O), (liberal, JJ, O), (newspaper, NN, O), (common, JJ, O), (sense, NN, O), (publish, JJ, O), (likewise, NN, O), (2003, CD, O), (student, NN, O), (believe, VBP, O), (paper, NN, O), (show, NN, O), (liberal, JJ, O), (bias, RB, O), (conservative, JJ, O), (paper, NN, O), (irish, NN, O), ...] | [(student, NN, O), (news, NN, O), (paper, NN, O), (find, VBP, O), (notre, JJ, O), (dame, NN, O)] | [] | |
| 9 | university notre dames student run number news medium outlet studentrun outlet include newspaper radio television station magazine journal begin onepage journal september 1876 scholastic magazine issue twice monthly claim old continuous collegiate publication united states magazine juggler release twice year focus student literature artwork dome yearbook publish annually newspaper vary publication interest observer publish daily mainly report university news staff student notre dame saint marys college unlike scholastic dome observer independent publication faculty advisor editorial oversight university 1987 student believe observer begin conservative bias liberal newspaper common sense publish likewise 2003 student believe paper show liberal bias conservative paper irish rover go production paper publish observer distribute student finally spring 2008 undergraduate journal political science research politic debut | year student paper common sense begin publication notre dame | 1987 | [university, notre, dames, student, run, number, news, medium, outlet, studentrun, outlet, include, newspaper, radio, television, station, magazine, journal, begin, onepage, journal, september, 1876, scholastic, magazine, issue, twice, monthly, claim, old, continuous, collegiate, publication, united, states, magazine, juggler, release, twice, year, focus, student, literature, artwork, dome, yearbook, publish, annually, newspaper, vary, publication, interest, observer, publish, daily, mainly, report, university, news, staff, student, notre, dame, saint, marys, college, unlike, scholastic, dome, observer, independent, publication, faculty, advisor, editorial, oversight, university, 1987, student, believe, observer, begin, conservative, bias, liberal, newspaper, common, sense, publish, likewise, 2003, student, believe, paper, show, liberal, bias, conservative, paper, irish, ...] | [year, student, paper, common, sense, begin, publication, notre, dame] | [1987] | [(university, NN), (notre, CC), (dames, JJ), (student, NN), (run, VB), (number, NN), (news, NN), (medium, NN), (outlet, NN), (studentrun, VB), (outlet, NN), (include, VBP), (newspaper, NN), (radio, NN), (television, NN), (station, NN), (magazine, NN), (journal, NN), (begin, JJ), (onepage, JJ), (journal, NN), (september, NN), (1876, CD), (scholastic, JJ), (magazine, NN), (issue, NN), (twice, RB), (monthly, JJ), (claim, NN), (old, JJ), (continuous, JJ), (collegiate, NN), (publication, NN), (united, VBD), (states, NNS), (magazine, NN), (juggler, NN), (release, NN), (twice, RB), (year, NN), (focus, NN), (student, NN), (literature, NN), (artwork, VBZ), (dome, JJ), (yearbook, NN), (publish, VB), (annually, RB), (newspaper, NN), (vary, JJ), (publication, NN), (interest, NN), (observer, IN), (publish, JJ), (daily, JJ), (mainly, RB), (report, VBP), (university, JJ), (news, NN), (staff, NN), (student, NN), (notre, NN), (dame, NN), (saint, NN), (marys, NN), (college, NN), (unlike, IN), (scholastic, JJ), (dome, NN), (observer, NN), (independent, JJ), (publication, NN), (faculty, NN), (advisor, NN), (editorial, NN), (oversight, NN), (university, NN), (1987, CD), (student, NN), (believe, VBP), (observer, IN), (begin, VBP), (conservative, JJ), (bias, NN), (liberal, JJ), (newspaper, NN), (common, JJ), (sense, NN), (publish, JJ), (likewise, NN), (2003, CD), (student, NN), (believe, VBP), (paper, NN), (show, NN), (liberal, JJ), (bias, RB), (conservative, JJ), (paper, NN), (irish, NN), ...] | [(year, NN), (student, NN), (paper, NN), (common, JJ), (sense, NN), (begin, NN), (publication, NN), (notre, FW), (dame, NN)] | [(1987, CD)] | [(university, NN, O), (notre, CC, O), (dames, JJ, O), (student, NN, O), (run, VB, O), (number, NN, O), (news, NN, O), (medium, NN, O), (outlet, NN, O), (studentrun, VB, O), (outlet, NN, O), (include, VBP, O), (newspaper, NN, O), (radio, NN, O), (television, NN, O), (station, NN, O), (magazine, NN, O), (journal, NN, O), (begin, JJ, O), (onepage, JJ, O), (journal, NN, O), (september, NN, O), (1876, CD, O), (scholastic, JJ, O), (magazine, NN, O), (issue, NN, O), (twice, RB, O), (monthly, JJ, O), (claim, NN, O), (old, JJ, O), (continuous, JJ, O), (collegiate, NN, O), (publication, NN, O), (united, VBD, O), (states, NNS, O), (magazine, NN, O), (juggler, NN, O), (release, NN, O), (twice, RB, O), (year, NN, O), (focus, NN, O), (student, NN, O), (literature, NN, O), (artwork, VBZ, O), (dome, JJ, O), (yearbook, NN, O), (publish, VB, O), (annually, RB, O), (newspaper, NN, O), (vary, JJ, O), (publication, NN, O), (interest, NN, O), (observer, IN, O), (publish, JJ, O), (daily, JJ, O), (mainly, RB, O), (report, VBP, O), (university, JJ, O), (news, NN, O), (staff, NN, O), (student, NN, O), (notre, NN, O), (dame, NN, O), (saint, NN, O), (marys, NN, O), (college, NN, O), (unlike, IN, O), (scholastic, JJ, O), (dome, NN, O), (observer, NN, O), (independent, JJ, O), (publication, NN, O), (faculty, NN, O), (advisor, NN, O), (editorial, NN, O), (oversight, NN, O), (university, NN, O), (1987, CD, O), (student, NN, O), (believe, VBP, O), (observer, IN, O), (begin, VBP, O), (conservative, JJ, O), (bias, NN, O), (liberal, JJ, O), (newspaper, NN, O), (common, JJ, O), (sense, NN, O), (publish, JJ, O), (likewise, NN, O), (2003, CD, O), (student, NN, O), (believe, VBP, O), (paper, NN, O), (show, NN, O), (liberal, JJ, O), (bias, RB, O), (conservative, JJ, O), (paper, NN, O), (irish, NN, O), ...] | [(year, NN, O), (student, NN, O), (paper, NN, O), (common, JJ, O), (sense, NN, O), (begin, NN, O), (publication, NN, O), (notre, FW, O), (dame, NN, O)] | [(1987, CD, O)] |
| 10 | university major seat congregation holy cross albeit official headquarters rome main seminary moreau seminary locate campus st joseph lake main building old college old building campus locate near shore st mary lake houses undergraduate seminarian retire priest brother reside fatima house retreat center holy cross house columba hall near grotto university moreau seminary tie theologian frederick buechner catholic buechner praise writer notre dame moreau seminary create buechner prize preach | headquarters congregation holy cross | rome | [university, major, seat, congregation, holy, cross, albeit, official, headquarters, rome, main, seminary, moreau, seminary, locate, campus, st, joseph, lake, main, building, old, college, old, building, campus, locate, near, shore, st, mary, lake, houses, undergraduate, seminarian, retire, priest, brother, reside, fatima, house, retreat, center, holy, cross, house, columba, hall, near, grotto, university, moreau, seminary, tie, theologian, frederick, buechner, catholic, buechner, praise, writer, notre, dame, moreau, seminary, create, buechner, prize, preach] | [headquarters, congregation, holy, cross] | [rome] | [(university, NN), (major, JJ), (seat, NN), (congregation, NN), (holy, NN), (cross, NN), (albeit, IN), (official, JJ), (headquarters, NNS), (rome, VBP), (main, JJ), (seminary, JJ), (moreau, NN), (seminary, JJ), (locate, NN), (campus, NN), (st, NN), (joseph, NN), (lake, VBP), (main, JJ), (building, NN), (old, JJ), (college, NN), (old, JJ), (building, NN), (campus, NN), (locate, NN), (near, IN), (shore, NN), (st, NN), (mary, JJ), (lake, NN), (houses, NNS), (undergraduate, VBP), (seminarian, JJ), (retire, NN), (priest, NN), (brother, NN), (reside, NN), (fatima, NN), (house, NN), (retreat, NN), (center, NN), (holy, NN), (cross, NN), (house, NN), (columba, VBD), (hall, NN), (near, IN), (grotto, NN), (university, NN), (moreau, NN), (seminary, JJ), (tie, NN), (theologian, JJ), (frederick, NN), (buechner, NN), (catholic, JJ), (buechner, NN), (praise, NN), (writer, NN), (notre, JJ), (dame, NN), (moreau, NN), (seminary, JJ), (create, NN), (buechner, NN), (prize, VB), (preach, NN)] | [(headquarters, NNS), (congregation, VBP), (holy, JJ), (cross, NN)] | [(rome, NN)] | [(university, NN, O), (major, JJ, O), (seat, NN, O), (congregation, NN, O), (holy, NN, O), (cross, NN, O), (albeit, IN, O), (official, JJ, O), (headquarters, NNS, O), (rome, VBP, O), (main, JJ, O), (seminary, JJ, O), (moreau, NN, O), (seminary, JJ, O), (locate, NN, O), (campus, NN, O), (st, NN, O), (joseph, NN, O), (lake, VBP, O), (main, JJ, O), (building, NN, O), (old, JJ, O), (college, NN, O), (old, JJ, O), (building, NN, O), (campus, NN, O), (locate, NN, O), (near, IN, O), (shore, NN, O), (st, NN, O), (mary, JJ, O), (lake, NN, O), (houses, NNS, O), (undergraduate, VBP, O), (seminarian, JJ, O), (retire, NN, O), (priest, NN, O), (brother, NN, O), (reside, NN, O), (fatima, NN, O), (house, NN, O), (retreat, NN, O), (center, NN, O), (holy, NN, O), (cross, NN, O), (house, NN, O), (columba, VBD, O), (hall, NN, O), (near, IN, O), (grotto, NN, O), (university, NN, O), (moreau, NN, O), (seminary, JJ, O), (tie, NN, O), (theologian, JJ, O), (frederick, NN, O), (buechner, NN, O), (catholic, JJ, O), (buechner, NN, O), (praise, NN, O), (writer, NN, O), (notre, JJ, O), (dame, NN, O), (moreau, NN, O), (seminary, JJ, O), (create, NN, O), (buechner, NN, O), (prize, VB, O), (preach, NN, O)] | [(headquarters, NNS, O), (congregation, VBP, O), (holy, JJ, O), (cross, NN, O)] | [(rome, NN, O)] |
| 11 | university major seat congregation holy cross albeit official headquarters rome main seminary moreau seminary locate campus st joseph lake main building old college old building campus locate near shore st mary lake houses undergraduate seminarian retire priest brother reside fatima house retreat center holy cross house columba hall near grotto university moreau seminary tie theologian frederick buechner catholic buechner praise writer notre dame moreau seminary create buechner prize preach | primary seminary congregation holy cross | moreau seminary | [university, major, seat, congregation, holy, cross, albeit, official, headquarters, rome, main, seminary, moreau, seminary, locate, campus, st, joseph, lake, main, building, old, college, old, building, campus, locate, near, shore, st, mary, lake, houses, undergraduate, seminarian, retire, priest, brother, reside, fatima, house, retreat, center, holy, cross, house, columba, hall, near, grotto, university, moreau, seminary, tie, theologian, frederick, buechner, catholic, buechner, praise, writer, notre, dame, moreau, seminary, create, buechner, prize, preach] | [primary, seminary, congregation, holy, cross] | [moreau, seminary] | [(university, NN), (major, JJ), (seat, NN), (congregation, NN), (holy, NN), (cross, NN), (albeit, IN), (official, JJ), (headquarters, NNS), (rome, VBP), (main, JJ), (seminary, JJ), (moreau, NN), (seminary, JJ), (locate, NN), (campus, NN), (st, NN), (joseph, NN), (lake, VBP), (main, JJ), (building, NN), (old, JJ), (college, NN), (old, JJ), (building, NN), (campus, NN), (locate, NN), (near, IN), (shore, NN), (st, NN), (mary, JJ), (lake, NN), (houses, NNS), (undergraduate, VBP), (seminarian, JJ), (retire, NN), (priest, NN), (brother, NN), (reside, NN), (fatima, NN), (house, NN), (retreat, NN), (center, NN), (holy, NN), (cross, NN), (house, NN), (columba, VBD), (hall, NN), (near, IN), (grotto, NN), (university, NN), (moreau, NN), (seminary, JJ), (tie, NN), (theologian, JJ), (frederick, NN), (buechner, NN), (catholic, JJ), (buechner, NN), (praise, NN), (writer, NN), (notre, JJ), (dame, NN), (moreau, NN), (seminary, JJ), (create, NN), (buechner, NN), (prize, VB), (preach, NN)] | [(primary, JJ), (seminary, JJ), (congregation, NN), (holy, NN), (cross, NN)] | [(moreau, NN), (seminary, NN)] | [(university, NN, O), (major, JJ, O), (seat, NN, O), (congregation, NN, O), (holy, NN, O), (cross, NN, O), (albeit, IN, O), (official, JJ, O), (headquarters, NNS, O), (rome, VBP, O), (main, JJ, O), (seminary, JJ, O), (moreau, NN, O), (seminary, JJ, O), (locate, NN, O), (campus, NN, O), (st, NN, O), (joseph, NN, O), (lake, VBP, O), (main, JJ, O), (building, NN, O), (old, JJ, O), (college, NN, O), (old, JJ, O), (building, NN, O), (campus, NN, O), (locate, NN, O), (near, IN, O), (shore, NN, O), (st, NN, O), (mary, JJ, O), (lake, NN, O), (houses, NNS, O), (undergraduate, VBP, O), (seminarian, JJ, O), (retire, NN, O), (priest, NN, O), (brother, NN, O), (reside, NN, O), (fatima, NN, O), (house, NN, O), (retreat, NN, O), (center, NN, O), (holy, NN, O), (cross, NN, O), (house, NN, O), (columba, VBD, O), (hall, NN, O), (near, IN, O), (grotto, NN, O), (university, NN, O), (moreau, NN, O), (seminary, JJ, O), (tie, NN, O), (theologian, JJ, O), (frederick, NN, O), (buechner, NN, O), (catholic, JJ, O), (buechner, NN, O), (praise, NN, O), (writer, NN, O), (notre, JJ, O), (dame, NN, O), (moreau, NN, O), (seminary, JJ, O), (create, NN, O), (buechner, NN, O), (prize, VB, O), (preach, NN, O)] | [(primary, JJ, O), (seminary, JJ, O), (congregation, NN, O), (holy, NN, O), (cross, NN, O)] | [(moreau, NN, O), (seminary, NN, O)] |
| 12 | university major seat congregation holy cross albeit official headquarters rome main seminary moreau seminary locate campus st joseph lake main building old college old building campus locate near shore st mary lake houses undergraduate seminarian retire priest brother reside fatima house retreat center holy cross house columba hall near grotto university moreau seminary tie theologian frederick buechner catholic buechner praise writer notre dame moreau seminary create buechner prize preach | old structure notre dame | old college | [university, major, seat, congregation, holy, cross, albeit, official, headquarters, rome, main, seminary, moreau, seminary, locate, campus, st, joseph, lake, main, building, old, college, old, building, campus, locate, near, shore, st, mary, lake, houses, undergraduate, seminarian, retire, priest, brother, reside, fatima, house, retreat, center, holy, cross, house, columba, hall, near, grotto, university, moreau, seminary, tie, theologian, frederick, buechner, catholic, buechner, praise, writer, notre, dame, moreau, seminary, create, buechner, prize, preach] | [old, structure, notre, dame] | [old, college] | [(university, NN), (major, JJ), (seat, NN), (congregation, NN), (holy, NN), (cross, NN), (albeit, IN), (official, JJ), (headquarters, NNS), (rome, VBP), (main, JJ), (seminary, JJ), (moreau, NN), (seminary, JJ), (locate, NN), (campus, NN), (st, NN), (joseph, NN), (lake, VBP), (main, JJ), (building, NN), (old, JJ), (college, NN), (old, JJ), (building, NN), (campus, NN), (locate, NN), (near, IN), (shore, NN), (st, NN), (mary, JJ), (lake, NN), (houses, NNS), (undergraduate, VBP), (seminarian, JJ), (retire, NN), (priest, NN), (brother, NN), (reside, NN), (fatima, NN), (house, NN), (retreat, NN), (center, NN), (holy, NN), (cross, NN), (house, NN), (columba, VBD), (hall, NN), (near, IN), (grotto, NN), (university, NN), (moreau, NN), (seminary, JJ), (tie, NN), (theologian, JJ), (frederick, NN), (buechner, NN), (catholic, JJ), (buechner, NN), (praise, NN), (writer, NN), (notre, JJ), (dame, NN), (moreau, NN), (seminary, JJ), (create, NN), (buechner, NN), (prize, VB), (preach, NN)] | [(old, JJ), (structure, NN), (notre, NN), (dame, NN)] | [(old, JJ), (college, NN)] | [(university, NN, O), (major, JJ, O), (seat, NN, O), (congregation, NN, O), (holy, NN, O), (cross, NN, O), (albeit, IN, O), (official, JJ, O), (headquarters, NNS, O), (rome, VBP, O), (main, JJ, O), (seminary, JJ, O), (moreau, NN, O), (seminary, JJ, O), (locate, NN, O), (campus, NN, O), (st, NN, O), (joseph, NN, O), (lake, VBP, O), (main, JJ, O), (building, NN, O), (old, JJ, O), (college, NN, O), (old, JJ, O), (building, NN, O), (campus, NN, O), (locate, NN, O), (near, IN, O), (shore, NN, O), (st, NN, O), (mary, JJ, O), (lake, NN, O), (houses, NNS, O), (undergraduate, VBP, O), (seminarian, JJ, O), (retire, NN, O), (priest, NN, O), (brother, NN, O), (reside, NN, O), (fatima, NN, O), (house, NN, O), (retreat, NN, O), (center, NN, O), (holy, NN, O), (cross, NN, O), (house, NN, O), (columba, VBD, O), (hall, NN, O), (near, IN, O), (grotto, NN, O), (university, NN, O), (moreau, NN, O), (seminary, JJ, O), (tie, NN, O), (theologian, JJ, O), (frederick, NN, O), (buechner, NN, O), (catholic, JJ, O), (buechner, NN, O), (praise, NN, O), (writer, NN, O), (notre, JJ, O), (dame, NN, O), (moreau, NN, O), (seminary, JJ, O), (create, NN, O), (buechner, NN, O), (prize, VB, O), (preach, NN, O)] | [(old, JJ, O), (structure, NN, O), (notre, NN, O), (dame, NN, O)] | [(old, JJ, O), (college, NN, O)] |
| 13 | university major seat congregation holy cross albeit official headquarters rome main seminary moreau seminary locate campus st joseph lake main building old college old building campus locate near shore st mary lake houses undergraduate seminarian retire priest brother reside fatima house retreat center holy cross house columba hall near grotto university moreau seminary tie theologian frederick buechner catholic buechner praise writer notre dame moreau seminary create buechner prize preach | individual live fatima house notre dame | retired priest brother | [university, major, seat, congregation, holy, cross, albeit, official, headquarters, rome, main, seminary, moreau, seminary, locate, campus, st, joseph, lake, main, building, old, college, old, building, campus, locate, near, shore, st, mary, lake, houses, undergraduate, seminarian, retire, priest, brother, reside, fatima, house, retreat, center, holy, cross, house, columba, hall, near, grotto, university, moreau, seminary, tie, theologian, frederick, buechner, catholic, buechner, praise, writer, notre, dame, moreau, seminary, create, buechner, prize, preach] | [individual, live, fatima, house, notre, dame] | [retired, priest, brother] | [(university, NN), (major, JJ), (seat, NN), (congregation, NN), (holy, NN), (cross, NN), (albeit, IN), (official, JJ), (headquarters, NNS), (rome, VBP), (main, JJ), (seminary, JJ), (moreau, NN), (seminary, JJ), (locate, NN), (campus, NN), (st, NN), (joseph, NN), (lake, VBP), (main, JJ), (building, NN), (old, JJ), (college, NN), (old, JJ), (building, NN), (campus, NN), (locate, NN), (near, IN), (shore, NN), (st, NN), (mary, JJ), (lake, NN), (houses, NNS), (undergraduate, VBP), (seminarian, JJ), (retire, NN), (priest, NN), (brother, NN), (reside, NN), (fatima, NN), (house, NN), (retreat, NN), (center, NN), (holy, NN), (cross, NN), (house, NN), (columba, VBD), (hall, NN), (near, IN), (grotto, NN), (university, NN), (moreau, NN), (seminary, JJ), (tie, NN), (theologian, JJ), (frederick, NN), (buechner, NN), (catholic, JJ), (buechner, NN), (praise, NN), (writer, NN), (notre, JJ), (dame, NN), (moreau, NN), (seminary, JJ), (create, NN), (buechner, NN), (prize, VB), (preach, NN)] | [(individual, JJ), (live, JJ), (fatima, NN), (house, NN), (notre, NN), (dame, NN)] | [(retired, VBN), (priest, JJS), (brother, NN)] | [(university, NN, O), (major, JJ, O), (seat, NN, O), (congregation, NN, O), (holy, NN, O), (cross, NN, O), (albeit, IN, O), (official, JJ, O), (headquarters, NNS, O), (rome, VBP, O), (main, JJ, O), (seminary, JJ, O), (moreau, NN, O), (seminary, JJ, O), (locate, NN, O), (campus, NN, O), (st, NN, O), (joseph, NN, O), (lake, VBP, O), (main, JJ, O), (building, NN, O), (old, JJ, O), (college, NN, O), (old, JJ, O), (building, NN, O), (campus, NN, O), (locate, NN, O), (near, IN, O), (shore, NN, O), (st, NN, O), (mary, JJ, O), (lake, NN, O), (houses, NNS, O), (undergraduate, VBP, O), (seminarian, JJ, O), (retire, NN, O), (priest, NN, O), (brother, NN, O), (reside, NN, O), (fatima, NN, O), (house, NN, O), (retreat, NN, O), (center, NN, O), (holy, NN, O), (cross, NN, O), (house, NN, O), (columba, VBD, O), (hall, NN, O), (near, IN, O), (grotto, NN, O), (university, NN, O), (moreau, NN, O), (seminary, JJ, O), (tie, NN, O), (theologian, JJ, O), (frederick, NN, O), (buechner, NN, O), (catholic, JJ, O), (buechner, NN, O), (praise, NN, O), (writer, NN, O), (notre, JJ, O), (dame, NN, O), (moreau, NN, O), (seminary, JJ, O), (create, NN, O), (buechner, NN, O), (prize, VB, O), (preach, NN, O)] | [(individual, JJ, O), (live, JJ, O), (fatima, NN, O), (house, NN, O), (notre, NN, O), (dame, NN, O)] | [(retired, VBN, O), (priest, JJS, O), (brother, NN, O)] |
| 14 | university major seat congregation holy cross albeit official headquarters rome main seminary moreau seminary locate campus st joseph lake main building old college old building campus locate near shore st mary lake houses undergraduate seminarian retire priest brother reside fatima house retreat center holy cross house columba hall near grotto university moreau seminary tie theologian frederick buechner catholic buechner praise writer notre dame moreau seminary create buechner prize preach | prize frederick buechner create | buechner prize preach | [university, major, seat, congregation, holy, cross, albeit, official, headquarters, rome, main, seminary, moreau, seminary, locate, campus, st, joseph, lake, main, building, old, college, old, building, campus, locate, near, shore, st, mary, lake, houses, undergraduate, seminarian, retire, priest, brother, reside, fatima, house, retreat, center, holy, cross, house, columba, hall, near, grotto, university, moreau, seminary, tie, theologian, frederick, buechner, catholic, buechner, praise, writer, notre, dame, moreau, seminary, create, buechner, prize, preach] | [prize, frederick, buechner, create] | [buechner, prize, preach] | [(university, NN), (major, JJ), (seat, NN), (congregation, NN), (holy, NN), (cross, NN), (albeit, IN), (official, JJ), (headquarters, NNS), (rome, VBP), (main, JJ), (seminary, JJ), (moreau, NN), (seminary, JJ), (locate, NN), (campus, NN), (st, NN), (joseph, NN), (lake, VBP), (main, JJ), (building, NN), (old, JJ), (college, NN), (old, JJ), (building, NN), (campus, NN), (locate, NN), (near, IN), (shore, NN), (st, NN), (mary, JJ), (lake, NN), (houses, NNS), (undergraduate, VBP), (seminarian, JJ), (retire, NN), (priest, NN), (brother, NN), (reside, NN), (fatima, NN), (house, NN), (retreat, NN), (center, NN), (holy, NN), (cross, NN), (house, NN), (columba, VBD), (hall, NN), (near, IN), (grotto, NN), (university, NN), (moreau, NN), (seminary, JJ), (tie, NN), (theologian, JJ), (frederick, NN), (buechner, NN), (catholic, JJ), (buechner, NN), (praise, NN), (writer, NN), (notre, JJ), (dame, NN), (moreau, NN), (seminary, JJ), (create, NN), (buechner, NN), (prize, VB), (preach, NN)] | [(prize, VB), (frederick, JJ), (buechner, NN), (create, NN)] | [(buechner, NN), (prize, VB), (preach, NN)] | [(university, NN, O), (major, JJ, O), (seat, NN, O), (congregation, NN, O), (holy, NN, O), (cross, NN, O), (albeit, IN, O), (official, JJ, O), (headquarters, NNS, O), (rome, VBP, O), (main, JJ, O), (seminary, JJ, O), (moreau, NN, O), (seminary, JJ, O), (locate, NN, O), (campus, NN, O), (st, NN, O), (joseph, NN, O), (lake, VBP, O), (main, JJ, O), (building, NN, O), (old, JJ, O), (college, NN, O), (old, JJ, O), (building, NN, O), (campus, NN, O), (locate, NN, O), (near, IN, O), (shore, NN, O), (st, NN, O), (mary, JJ, O), (lake, NN, O), (houses, NNS, O), (undergraduate, VBP, O), (seminarian, JJ, O), (retire, NN, O), (priest, NN, O), (brother, NN, O), (reside, NN, O), (fatima, NN, O), (house, NN, O), (retreat, NN, O), (center, NN, O), (holy, NN, O), (cross, NN, O), (house, NN, O), (columba, VBD, O), (hall, NN, O), (near, IN, O), (grotto, NN, O), (university, NN, O), (moreau, NN, O), (seminary, JJ, O), (tie, NN, O), (theologian, JJ, O), (frederick, NN, O), (buechner, NN, O), (catholic, JJ, O), (buechner, NN, O), (praise, NN, O), (writer, NN, O), (notre, JJ, O), (dame, NN, O), (moreau, NN, O), (seminary, JJ, O), (create, NN, O), (buechner, NN, O), (prize, VB, O), (preach, NN, O)] | [(prize, VB, O), (frederick, JJ, O), (buechner, NN, O), (create, NN, O)] | [(buechner, NN, O), (prize, VB, O), (preach, NN, O)] |
| 15 | college engineering establish 1920 early course civil mechanical engineering college science 1870s today college house fitzpatrick cushing stinsonremick hall engineering include department study aerospace mechanical engineering chemical biomolecular engineering civil engineering geological science computer science engineering electrical engineering bs degree offer additionally college offer fiveyear dual degree program college art letter business award additional ba master business administration mba degree respectively | bs level degree offer college engineering notre dame | [college, engineering, establish, 1920, early, course, civil, mechanical, engineering, college, science, 1870s, today, college, house, fitzpatrick, cushing, stinsonremick, hall, engineering, include, department, study, aerospace, mechanical, engineering, chemical, biomolecular, engineering, civil, engineering, geological, science, computer, science, engineering, electrical, engineering, bs, degree, offer, additionally, college, offer, fiveyear, dual, degree, program, college, art, letter, business, award, additional, ba, master, business, administration, mba, degree, respectively] | [bs, level, degree, offer, college, engineering, notre, dame] | [] | [(college, NN), (engineering, NN), (establish, VB), (1920, CD), (early, JJ), (course, NN), (civil, JJ), (mechanical, JJ), (engineering, NN), (college, NN), (science, NN), (1870s, CD), (today, NN), (college, NN), (house, NN), (fitzpatrick, JJ), (cushing, VBG), (stinsonremick, JJ), (hall, NN), (engineering, NN), (include, VBP), (department, NN), (study, NN), (aerospace, VBP), (mechanical, JJ), (engineering, NN), (chemical, NN), (biomolecular, JJ), (engineering, NN), (civil, JJ), (engineering, NN), (geological, JJ), (science, NN), (computer, NN), (science, NN), (engineering, NN), (electrical, JJ), (engineering, NN), (bs, NN), (degree, NN), (offer, VBP), (additionally, RB), (college, NN), (offer, NN), (fiveyear, JJ), (dual, JJ), (degree, NN), (program, NN), (college, NN), (art, RB), (letter, NN), (business, NN), (award, VBD), (additional, JJ), (ba, NN), (master, NN), (business, NN), (administration, NN), (mba, NN), (degree, VBP), (respectively, RB)] | [(bs, NNS), (level, VBP), (degree, JJ), (offer, NN), (college, NN), (engineering, NN), (notre, NN), (dame, NN)] | [] | [(college, NN, O), (engineering, NN, O), (establish, VB, O), (1920, CD, O), (early, JJ, O), (course, NN, O), (civil, JJ, O), (mechanical, JJ, O), (engineering, NN, O), (college, NN, O), (science, NN, O), (1870s, CD, O), (today, NN, O), (college, NN, O), (house, NN, O), (fitzpatrick, JJ, O), (cushing, VBG, O), (stinsonremick, JJ, O), (hall, NN, O), (engineering, NN, O), (include, VBP, O), (department, NN, O), (study, NN, O), (aerospace, VBP, O), (mechanical, JJ, O), (engineering, NN, O), (chemical, NN, O), (biomolecular, JJ, O), (engineering, NN, O), (civil, JJ, O), (engineering, NN, O), (geological, JJ, O), (science, NN, O), (computer, NN, O), (science, NN, O), (engineering, NN, O), (electrical, JJ, O), (engineering, NN, O), (bs, NN, O), (degree, NN, O), (offer, VBP, O), (additionally, RB, O), (college, NN, O), (offer, NN, O), (fiveyear, JJ, O), (dual, JJ, O), (degree, NN, O), (program, NN, O), (college, NN, O), (art, RB, O), (letter, NN, O), (business, NN, O), (award, VBD, O), (additional, JJ, O), (ba, NN, O), (master, NN, O), (business, NN, O), (administration, NN, O), (mba, NN, O), (degree, VBP, O), (respectively, RB, O)] | [(bs, NNS, O), (level, VBP, O), (degree, JJ, O), (offer, NN, O), (college, NN, O), (engineering, NN, O), (notre, NN, O), (dame, NN, O)] | [] | |
| 16 | college engineering establish 1920 early course civil mechanical engineering college science 1870s today college house fitzpatrick cushing stinsonremick hall engineering include department study aerospace mechanical engineering chemical biomolecular engineering civil engineering geological science computer science engineering electrical engineering bs degree offer additionally college offer fiveyear dual degree program college art letter business award additional ba master business administration mba degree respectively | year college engineering notre dame form | 1920 | [college, engineering, establish, 1920, early, course, civil, mechanical, engineering, college, science, 1870s, today, college, house, fitzpatrick, cushing, stinsonremick, hall, engineering, include, department, study, aerospace, mechanical, engineering, chemical, biomolecular, engineering, civil, engineering, geological, science, computer, science, engineering, electrical, engineering, bs, degree, offer, additionally, college, offer, fiveyear, dual, degree, program, college, art, letter, business, award, additional, ba, master, business, administration, mba, degree, respectively] | [year, college, engineering, notre, dame, form] | [1920] | [(college, NN), (engineering, NN), (establish, VB), (1920, CD), (early, JJ), (course, NN), (civil, JJ), (mechanical, JJ), (engineering, NN), (college, NN), (science, NN), (1870s, CD), (today, NN), (college, NN), (house, NN), (fitzpatrick, JJ), (cushing, VBG), (stinsonremick, JJ), (hall, NN), (engineering, NN), (include, VBP), (department, NN), (study, NN), (aerospace, VBP), (mechanical, JJ), (engineering, NN), (chemical, NN), (biomolecular, JJ), (engineering, NN), (civil, JJ), (engineering, NN), (geological, JJ), (science, NN), (computer, NN), (science, NN), (engineering, NN), (electrical, JJ), (engineering, NN), (bs, NN), (degree, NN), (offer, VBP), (additionally, RB), (college, NN), (offer, NN), (fiveyear, JJ), (dual, JJ), (degree, NN), (program, NN), (college, NN), (art, RB), (letter, NN), (business, NN), (award, VBD), (additional, JJ), (ba, NN), (master, NN), (business, NN), (administration, NN), (mba, NN), (degree, VBP), (respectively, RB)] | [(year, NN), (college, NN), (engineering, NN), (notre, JJ), (dame, NN), (form, NN)] | [(1920, CD)] | [(college, NN, O), (engineering, NN, O), (establish, VB, O), (1920, CD, O), (early, JJ, O), (course, NN, O), (civil, JJ, O), (mechanical, JJ, O), (engineering, NN, O), (college, NN, O), (science, NN, O), (1870s, CD, O), (today, NN, O), (college, NN, O), (house, NN, O), (fitzpatrick, JJ, O), (cushing, VBG, O), (stinsonremick, JJ, O), (hall, NN, O), (engineering, NN, O), (include, VBP, O), (department, NN, O), (study, NN, O), (aerospace, VBP, O), (mechanical, JJ, O), (engineering, NN, O), (chemical, NN, O), (biomolecular, JJ, O), (engineering, NN, O), (civil, JJ, O), (engineering, NN, O), (geological, JJ, O), (science, NN, O), (computer, NN, O), (science, NN, O), (engineering, NN, O), (electrical, JJ, O), (engineering, NN, O), (bs, NN, O), (degree, NN, O), (offer, VBP, O), (additionally, RB, O), (college, NN, O), (offer, NN, O), (fiveyear, JJ, O), (dual, JJ, O), (degree, NN, O), (program, NN, O), (college, NN, O), (art, RB, O), (letter, NN, O), (business, NN, O), (award, VBD, O), (additional, JJ, O), (ba, NN, O), (master, NN, O), (business, NN, O), (administration, NN, O), (mba, NN, O), (degree, VBP, O), (respectively, RB, O)] | [(year, NN, O), (college, NN, O), (engineering, NN, O), (notre, JJ, O), (dame, NN, O), (form, NN, O)] | [(1920, CD, O)] |
| 17 | college engineering establish 1920 early course civil mechanical engineering college science 1870s today college house fitzpatrick cushing stinsonremick hall engineering include department study aerospace mechanical engineering chemical biomolecular engineering civil engineering geological science computer science engineering electrical engineering bs degree offer additionally college offer fiveyear dual degree program college art letter business award additional ba master business administration mba degree respectively | creation college engineering similar study carry notre dame college | college science | [college, engineering, establish, 1920, early, course, civil, mechanical, engineering, college, science, 1870s, today, college, house, fitzpatrick, cushing, stinsonremick, hall, engineering, include, department, study, aerospace, mechanical, engineering, chemical, biomolecular, engineering, civil, engineering, geological, science, computer, science, engineering, electrical, engineering, bs, degree, offer, additionally, college, offer, fiveyear, dual, degree, program, college, art, letter, business, award, additional, ba, master, business, administration, mba, degree, respectively] | [creation, college, engineering, similar, study, carry, notre, dame, college] | [college, science] | [(college, NN), (engineering, NN), (establish, VB), (1920, CD), (early, JJ), (course, NN), (civil, JJ), (mechanical, JJ), (engineering, NN), (college, NN), (science, NN), (1870s, CD), (today, NN), (college, NN), (house, NN), (fitzpatrick, JJ), (cushing, VBG), (stinsonremick, JJ), (hall, NN), (engineering, NN), (include, VBP), (department, NN), (study, NN), (aerospace, VBP), (mechanical, JJ), (engineering, NN), (chemical, NN), (biomolecular, JJ), (engineering, NN), (civil, JJ), (engineering, NN), (geological, JJ), (science, NN), (computer, NN), (science, NN), (engineering, NN), (electrical, JJ), (engineering, NN), (bs, NN), (degree, NN), (offer, VBP), (additionally, RB), (college, NN), (offer, NN), (fiveyear, JJ), (dual, JJ), (degree, NN), (program, NN), (college, NN), (art, RB), (letter, NN), (business, NN), (award, VBD), (additional, JJ), (ba, NN), (master, NN), (business, NN), (administration, NN), (mba, NN), (degree, VBP), (respectively, RB)] | [(creation, NN), (college, NN), (engineering, NN), (similar, JJ), (study, NN), (carry, NN), (notre, JJ), (dame, NN), (college, NN)] | [(college, NN), (science, NN)] | [(college, NN, O), (engineering, NN, O), (establish, VB, O), (1920, CD, O), (early, JJ, O), (course, NN, O), (civil, JJ, O), (mechanical, JJ, O), (engineering, NN, O), (college, NN, O), (science, NN, O), (1870s, CD, O), (today, NN, O), (college, NN, O), (house, NN, O), (fitzpatrick, JJ, O), (cushing, VBG, O), (stinsonremick, JJ, O), (hall, NN, O), (engineering, NN, O), (include, VBP, O), (department, NN, O), (study, NN, O), (aerospace, VBP, O), (mechanical, JJ, O), (engineering, NN, O), (chemical, NN, O), (biomolecular, JJ, O), (engineering, NN, O), (civil, JJ, O), (engineering, NN, O), (geological, JJ, O), (science, NN, O), (computer, NN, O), (science, NN, O), (engineering, NN, O), (electrical, JJ, O), (engineering, NN, O), (bs, NN, O), (degree, NN, O), (offer, VBP, O), (additionally, RB, O), (college, NN, O), (offer, NN, O), (fiveyear, JJ, O), (dual, JJ, O), (degree, NN, O), (program, NN, O), (college, NN, O), (art, RB, O), (letter, NN, O), (business, NN, O), (award, VBD, O), (additional, JJ, O), (ba, NN, O), (master, NN, O), (business, NN, O), (administration, NN, O), (mba, NN, O), (degree, VBP, O), (respectively, RB, O)] | [(creation, NN, O), (college, NN, O), (engineering, NN, O), (similar, JJ, O), (study, NN, O), (carry, NN, O), (notre, JJ, O), (dame, NN, O), (college, NN, O)] | [(college, NN, O), (science, NN, O)] |
| 18 | college engineering establish 1920 early course civil mechanical engineering college science 1870s today college house fitzpatrick cushing stinsonremick hall engineering include department study aerospace mechanical engineering chemical biomolecular engineering civil engineering geological science computer science engineering electrical engineering bs degree offer additionally college offer fiveyear dual degree program college art letter business award additional ba master business administration mba degree respectively | department stinsonremick hall engineering | [college, engineering, establish, 1920, early, course, civil, mechanical, engineering, college, science, 1870s, today, college, house, fitzpatrick, cushing, stinsonremick, hall, engineering, include, department, study, aerospace, mechanical, engineering, chemical, biomolecular, engineering, civil, engineering, geological, science, computer, science, engineering, electrical, engineering, bs, degree, offer, additionally, college, offer, fiveyear, dual, degree, program, college, art, letter, business, award, additional, ba, master, business, administration, mba, degree, respectively] | [department, stinsonremick, hall, engineering] | [] | [(college, NN), (engineering, NN), (establish, VB), (1920, CD), (early, JJ), (course, NN), (civil, JJ), (mechanical, JJ), (engineering, NN), (college, NN), (science, NN), (1870s, CD), (today, NN), (college, NN), (house, NN), (fitzpatrick, JJ), (cushing, VBG), (stinsonremick, JJ), (hall, NN), (engineering, NN), (include, VBP), (department, NN), (study, NN), (aerospace, VBP), (mechanical, JJ), (engineering, NN), (chemical, NN), (biomolecular, JJ), (engineering, NN), (civil, JJ), (engineering, NN), (geological, JJ), (science, NN), (computer, NN), (science, NN), (engineering, NN), (electrical, JJ), (engineering, NN), (bs, NN), (degree, NN), (offer, VBP), (additionally, RB), (college, NN), (offer, NN), (fiveyear, JJ), (dual, JJ), (degree, NN), (program, NN), (college, NN), (art, RB), (letter, NN), (business, NN), (award, VBD), (additional, JJ), (ba, NN), (master, NN), (business, NN), (administration, NN), (mba, NN), (degree, VBP), (respectively, RB)] | [(department, NN), (stinsonremick, NN), (hall, NN), (engineering, NN)] | [] | [(college, NN, O), (engineering, NN, O), (establish, VB, O), (1920, CD, O), (early, JJ, O), (course, NN, O), (civil, JJ, O), (mechanical, JJ, O), (engineering, NN, O), (college, NN, O), (science, NN, O), (1870s, CD, O), (today, NN, O), (college, NN, O), (house, NN, O), (fitzpatrick, JJ, O), (cushing, VBG, O), (stinsonremick, JJ, O), (hall, NN, O), (engineering, NN, O), (include, VBP, O), (department, NN, O), (study, NN, O), (aerospace, VBP, O), (mechanical, JJ, O), (engineering, NN, O), (chemical, NN, O), (biomolecular, JJ, O), (engineering, NN, O), (civil, JJ, O), (engineering, NN, O), (geological, JJ, O), (science, NN, O), (computer, NN, O), (science, NN, O), (engineering, NN, O), (electrical, JJ, O), (engineering, NN, O), (bs, NN, O), (degree, NN, O), (offer, VBP, O), (additionally, RB, O), (college, NN, O), (offer, NN, O), (fiveyear, JJ, O), (dual, JJ, O), (degree, NN, O), (program, NN, O), (college, NN, O), (art, RB, O), (letter, NN, O), (business, NN, O), (award, VBD, O), (additional, JJ, O), (ba, NN, O), (master, NN, O), (business, NN, O), (administration, NN, O), (mba, NN, O), (degree, VBP, O), (respectively, RB, O)] | [(department, NN, O), (stinsonremick, NN, O), (hall, NN, O), (engineering, NN, O)] | [] | |
| 19 | college engineering establish 1920 early course civil mechanical engineering college science 1870s today college house fitzpatrick cushing stinsonremick hall engineering include department study aerospace mechanical engineering chemical biomolecular engineering civil engineering geological science computer science engineering electrical engineering bs degree offer additionally college offer fiveyear dual degree program college art letter business award additional ba master business administration mba degree respectively | college science begin offer civil engineering course begin time notre dame | 1870s | [college, engineering, establish, 1920, early, course, civil, mechanical, engineering, college, science, 1870s, today, college, house, fitzpatrick, cushing, stinsonremick, hall, engineering, include, department, study, aerospace, mechanical, engineering, chemical, biomolecular, engineering, civil, engineering, geological, science, computer, science, engineering, electrical, engineering, bs, degree, offer, additionally, college, offer, fiveyear, dual, degree, program, college, art, letter, business, award, additional, ba, master, business, administration, mba, degree, respectively] | [college, science, begin, offer, civil, engineering, course, begin, time, notre, dame] | [1870s] | [(college, NN), (engineering, NN), (establish, VB), (1920, CD), (early, JJ), (course, NN), (civil, JJ), (mechanical, JJ), (engineering, NN), (college, NN), (science, NN), (1870s, CD), (today, NN), (college, NN), (house, NN), (fitzpatrick, JJ), (cushing, VBG), (stinsonremick, JJ), (hall, NN), (engineering, NN), (include, VBP), (department, NN), (study, NN), (aerospace, VBP), (mechanical, JJ), (engineering, NN), (chemical, NN), (biomolecular, JJ), (engineering, NN), (civil, JJ), (engineering, NN), (geological, JJ), (science, NN), (computer, NN), (science, NN), (engineering, NN), (electrical, JJ), (engineering, NN), (bs, NN), (degree, NN), (offer, VBP), (additionally, RB), (college, NN), (offer, NN), (fiveyear, JJ), (dual, JJ), (degree, NN), (program, NN), (college, NN), (art, RB), (letter, NN), (business, NN), (award, VBD), (additional, JJ), (ba, NN), (master, NN), (business, NN), (administration, NN), (mba, NN), (degree, VBP), (respectively, RB)] | [(college, NN), (science, NN), (begin, NN), (offer, VBP), (civil, JJ), (engineering, NN), (course, NN), (begin, JJ), (time, NN), (notre, JJ), (dame, NN)] | [(1870s, NNS)] | [(college, NN, O), (engineering, NN, O), (establish, VB, O), (1920, CD, O), (early, JJ, O), (course, NN, O), (civil, JJ, O), (mechanical, JJ, O), (engineering, NN, O), (college, NN, O), (science, NN, O), (1870s, CD, O), (today, NN, O), (college, NN, O), (house, NN, O), (fitzpatrick, JJ, O), (cushing, VBG, O), (stinsonremick, JJ, O), (hall, NN, O), (engineering, NN, O), (include, VBP, O), (department, NN, O), (study, NN, O), (aerospace, VBP, O), (mechanical, JJ, O), (engineering, NN, O), (chemical, NN, O), (biomolecular, JJ, O), (engineering, NN, O), (civil, JJ, O), (engineering, NN, O), (geological, JJ, O), (science, NN, O), (computer, NN, O), (science, NN, O), (engineering, NN, O), (electrical, JJ, O), (engineering, NN, O), (bs, NN, O), (degree, NN, O), (offer, VBP, O), (additionally, RB, O), (college, NN, O), (offer, NN, O), (fiveyear, JJ, O), (dual, JJ, O), (degree, NN, O), (program, NN, O), (college, NN, O), (art, RB, O), (letter, NN, O), (business, NN, O), (award, VBD, O), (additional, JJ, O), (ba, NN, O), (master, NN, O), (business, NN, O), (administration, NN, O), (mba, NN, O), (degree, VBP, O), (respectively, RB, O)] | [(college, NN, O), (science, NN, O), (begin, NN, O), (offer, VBP, O), (civil, JJ, O), (engineering, NN, O), (course, NN, O), (begin, JJ, O), (time, NN, O), (notre, JJ, O), (dame, NN, O)] | [(1870s, NNS, O)] |
| 20 | notre dames undergraduate student undergraduate college school year study program year study program establish 1962 guide incoming freshman year school declare major student give academic advisor program help choose class exposure major interested program include learn resource center provide time management collaborative learning subject tutoring program recognize previously news world report outstanding | entity provide help management time new student notre dame | learn resource center | [notre, dames, undergraduate, student, undergraduate, college, school, year, study, program, year, study, program, establish, 1962, guide, incoming, freshman, year, school, declare, major, student, give, academic, advisor, program, help, choose, class, exposure, major, interested, program, include, learn, resource, center, provide, time, management, collaborative, learning, subject, tutoring, program, recognize, previously, news, world, report, outstanding] | [entity, provide, help, management, time, new, student, notre, dame] | [learn, resource, center] | [(notre, JJ), (dames, NNS), (undergraduate, JJ), (student, NN), (undergraduate, JJ), (college, NN), (school, NN), (year, NN), (study, NN), (program, NN), (year, NN), (study, NN), (program, NN), (establish, VB), (1962, CD), (guide, NN), (incoming, VBG), (freshman, JJ), (year, NN), (school, NN), (declare, NN), (major, JJ), (student, NN), (give, VBP), (academic, JJ), (advisor, NN), (program, NN), (help, NN), (choose, VB), (class, NN), (exposure, NN), (major, JJ), (interested, JJ), (program, NN), (include, VBP), (learn, JJ), (resource, NN), (center, NN), (provide, VBP), (time, NN), (management, NN), (collaborative, JJ), (learning, NN), (subject, JJ), (tutoring, VBG), (program, NN), (recognize, NN), (previously, RB), (news, NN), (world, NN), (report, NN), (outstanding, JJ)] | [(entity, NN), (provide, VB), (help, NN), (management, NN), (time, NN), (new, JJ), (student, NN), (notre, NN), (dame, NN)] | [(learn, JJ), (resource, NN), (center, NN)] | [(notre, JJ, O), (dames, NNS, O), (undergraduate, JJ, O), (student, NN, O), (undergraduate, JJ, O), (college, NN, O), (school, NN, O), (year, NN, O), (study, NN, O), (program, NN, O), (year, NN, O), (study, NN, O), (program, NN, O), (establish, VB, O), (1962, CD, O), (guide, NN, O), (incoming, VBG, O), (freshman, JJ, O), (year, NN, O), (school, NN, O), (declare, NN, O), (major, JJ, O), (student, NN, O), (give, VBP, O), (academic, JJ, O), (advisor, NN, O), (program, NN, O), (help, NN, O), (choose, VB, O), (class, NN, O), (exposure, NN, O), (major, JJ, O), (interested, JJ, O), (program, NN, O), (include, VBP, O), (learn, JJ, O), (resource, NN, O), (center, NN, O), (provide, VBP, O), (time, NN, O), (management, NN, O), (collaborative, JJ, O), (learning, NN, O), (subject, JJ, O), (tutoring, VBG, O), (program, NN, O), (recognize, NN, O), (previously, RB, O), (news, NN, O), (world, NN, O), (report, NN, O), (outstanding, JJ, O)] | [(entity, NN, O), (provide, VB, O), (help, NN, O), (management, NN, O), (time, NN, O), (new, JJ, O), (student, NN, O), (notre, NN, O), (dame, NN, O)] | [(learn, JJ, O), (resource, NN, O), (center, NN, O)] |
| 21 | notre dames undergraduate student undergraduate college school year study program year study program establish 1962 guide incoming freshman year school declare major student give academic advisor program help choose class exposure major interested program include learn resource center provide time management collaborative learning subject tutoring program recognize previously news world report outstanding | college undergraduate notre dame | [notre, dames, undergraduate, student, undergraduate, college, school, year, study, program, year, study, program, establish, 1962, guide, incoming, freshman, year, school, declare, major, student, give, academic, advisor, program, help, choose, class, exposure, major, interested, program, include, learn, resource, center, provide, time, management, collaborative, learning, subject, tutoring, program, recognize, previously, news, world, report, outstanding] | [college, undergraduate, notre, dame] | [] | [(notre, JJ), (dames, NNS), (undergraduate, JJ), (student, NN), (undergraduate, JJ), (college, NN), (school, NN), (year, NN), (study, NN), (program, NN), (year, NN), (study, NN), (program, NN), (establish, VB), (1962, CD), (guide, NN), (incoming, VBG), (freshman, JJ), (year, NN), (school, NN), (declare, NN), (major, JJ), (student, NN), (give, VBP), (academic, JJ), (advisor, NN), (program, NN), (help, NN), (choose, VB), (class, NN), (exposure, NN), (major, JJ), (interested, JJ), (program, NN), (include, VBP), (learn, JJ), (resource, NN), (center, NN), (provide, VBP), (time, NN), (management, NN), (collaborative, JJ), (learning, NN), (subject, JJ), (tutoring, VBG), (program, NN), (recognize, NN), (previously, RB), (news, NN), (world, NN), (report, NN), (outstanding, JJ)] | [(college, NN), (undergraduate, NN), (notre, NN), (dame, NN)] | [] | [(notre, JJ, O), (dames, NNS, O), (undergraduate, JJ, O), (student, NN, O), (undergraduate, JJ, O), (college, NN, O), (school, NN, O), (year, NN, O), (study, NN, O), (program, NN, O), (year, NN, O), (study, NN, O), (program, NN, O), (establish, VB, O), (1962, CD, O), (guide, NN, O), (incoming, VBG, O), (freshman, JJ, O), (year, NN, O), (school, NN, O), (declare, NN, O), (major, JJ, O), (student, NN, O), (give, VBP, O), (academic, JJ, O), (advisor, NN, O), (program, NN, O), (help, NN, O), (choose, VB, O), (class, NN, O), (exposure, NN, O), (major, JJ, O), (interested, JJ, O), (program, NN, O), (include, VBP, O), (learn, JJ, O), (resource, NN, O), (center, NN, O), (provide, VBP, O), (time, NN, O), (management, NN, O), (collaborative, JJ, O), (learning, NN, O), (subject, JJ, O), (tutoring, VBG, O), (program, NN, O), (recognize, NN, O), (previously, RB, O), (news, NN, O), (world, NN, O), (report, NN, O), (outstanding, JJ, O)] | [(college, NN, O), (undergraduate, NN, O), (notre, NN, O), (dame, NN, O)] | [] | |
| 22 | notre dames undergraduate student undergraduate college school year study program year study program establish 1962 guide incoming freshman year school declare major student give academic advisor program help choose class exposure major interested program include learn resource center provide time management collaborative learning subject tutoring program recognize previously news world report outstanding | create notre dame 1962 assist year student | year study program | [notre, dames, undergraduate, student, undergraduate, college, school, year, study, program, year, study, program, establish, 1962, guide, incoming, freshman, year, school, declare, major, student, give, academic, advisor, program, help, choose, class, exposure, major, interested, program, include, learn, resource, center, provide, time, management, collaborative, learning, subject, tutoring, program, recognize, previously, news, world, report, outstanding] | [create, notre, dame, 1962, assist, year, student] | [year, study, program] | [(notre, JJ), (dames, NNS), (undergraduate, JJ), (student, NN), (undergraduate, JJ), (college, NN), (school, NN), (year, NN), (study, NN), (program, NN), (year, NN), (study, NN), (program, NN), (establish, VB), (1962, CD), (guide, NN), (incoming, VBG), (freshman, JJ), (year, NN), (school, NN), (declare, NN), (major, JJ), (student, NN), (give, VBP), (academic, JJ), (advisor, NN), (program, NN), (help, NN), (choose, VB), (class, NN), (exposure, NN), (major, JJ), (interested, JJ), (program, NN), (include, VBP), (learn, JJ), (resource, NN), (center, NN), (provide, VBP), (time, NN), (management, NN), (collaborative, JJ), (learning, NN), (subject, JJ), (tutoring, VBG), (program, NN), (recognize, NN), (previously, RB), (news, NN), (world, NN), (report, NN), (outstanding, JJ)] | [(create, NN), (notre, NNS), (dame, JJ), (1962, CD), (assist, JJ), (year, NN), (student, NN)] | [(year, NN), (study, NN), (program, NN)] | [(notre, JJ, O), (dames, NNS, O), (undergraduate, JJ, O), (student, NN, O), (undergraduate, JJ, O), (college, NN, O), (school, NN, O), (year, NN, O), (study, NN, O), (program, NN, O), (year, NN, O), (study, NN, O), (program, NN, O), (establish, VB, O), (1962, CD, O), (guide, NN, O), (incoming, VBG, O), (freshman, JJ, O), (year, NN, O), (school, NN, O), (declare, NN, O), (major, JJ, O), (student, NN, O), (give, VBP, O), (academic, JJ, O), (advisor, NN, O), (program, NN, O), (help, NN, O), (choose, VB, O), (class, NN, O), (exposure, NN, O), (major, JJ, O), (interested, JJ, O), (program, NN, O), (include, VBP, O), (learn, JJ, O), (resource, NN, O), (center, NN, O), (provide, VBP, O), (time, NN, O), (management, NN, O), (collaborative, JJ, O), (learning, NN, O), (subject, JJ, O), (tutoring, VBG, O), (program, NN, O), (recognize, NN, O), (previously, RB, O), (news, NN, O), (world, NN, O), (report, NN, O), (outstanding, JJ, O)] | [(create, NN, O), (notre, NNS, O), (dame, JJ, O), (1962, CD, O), (assist, JJ, O), (year, NN, O), (student, NN, O)] | [(year, NN, O), (study, NN, O), (program, NN, O)] |
| 23 | notre dames undergraduate student undergraduate college school year study program year study program establish 1962 guide incoming freshman year school declare major student give academic advisor program help choose class exposure major interested program include learn resource center provide time management collaborative learning subject tutoring program recognize previously news world report outstanding | organization declare year study program notre dame outstanding | news world report | [notre, dames, undergraduate, student, undergraduate, college, school, year, study, program, year, study, program, establish, 1962, guide, incoming, freshman, year, school, declare, major, student, give, academic, advisor, program, help, choose, class, exposure, major, interested, program, include, learn, resource, center, provide, time, management, collaborative, learning, subject, tutoring, program, recognize, previously, news, world, report, outstanding] | [organization, declare, year, study, program, notre, dame, outstanding] | [news, world, report] | [(notre, JJ), (dames, NNS), (undergraduate, JJ), (student, NN), (undergraduate, JJ), (college, NN), (school, NN), (year, NN), (study, NN), (program, NN), (year, NN), (study, NN), (program, NN), (establish, VB), (1962, CD), (guide, NN), (incoming, VBG), (freshman, JJ), (year, NN), (school, NN), (declare, NN), (major, JJ), (student, NN), (give, VBP), (academic, JJ), (advisor, NN), (program, NN), (help, NN), (choose, VB), (class, NN), (exposure, NN), (major, JJ), (interested, JJ), (program, NN), (include, VBP), (learn, JJ), (resource, NN), (center, NN), (provide, VBP), (time, NN), (management, NN), (collaborative, JJ), (learning, NN), (subject, JJ), (tutoring, VBG), (program, NN), (recognize, NN), (previously, RB), (news, NN), (world, NN), (report, NN), (outstanding, JJ)] | [(organization, NN), (declare, NN), (year, NN), (study, NN), (program, NN), (notre, NNS), (dame, VBP), (outstanding, JJ)] | [(news, NN), (world, NN), (report, NN)] | [(notre, JJ, O), (dames, NNS, O), (undergraduate, JJ, O), (student, NN, O), (undergraduate, JJ, O), (college, NN, O), (school, NN, O), (year, NN, O), (study, NN, O), (program, NN, O), (year, NN, O), (study, NN, O), (program, NN, O), (establish, VB, O), (1962, CD, O), (guide, NN, O), (incoming, VBG, O), (freshman, JJ, O), (year, NN, O), (school, NN, O), (declare, NN, O), (major, JJ, O), (student, NN, O), (give, VBP, O), (academic, JJ, O), (advisor, NN, O), (program, NN, O), (help, NN, O), (choose, VB, O), (class, NN, O), (exposure, NN, O), (major, JJ, O), (interested, JJ, O), (program, NN, O), (include, VBP, O), (learn, JJ, O), (resource, NN, O), (center, NN, O), (provide, VBP, O), (time, NN, O), (management, NN, O), (collaborative, JJ, O), (learning, NN, O), (subject, JJ, O), (tutoring, VBG, O), (program, NN, O), (recognize, NN, O), (previously, RB, O), (news, NN, O), (world, NN, O), (report, NN, O), (outstanding, JJ, O)] | [(organization, NN, O), (declare, NN, O), (year, NN, O), (study, NN, O), (program, NN, O), (notre, NNS, O), (dame, VBP, O), (outstanding, JJ, O)] | [(news, NN, O), (world, NN, O), (report, NN, O)] |
| 24 | university offer graduate degree form master arts ma 18541855 academic year program expand include master law llm master civil engineering early stage growth formal graduate school education develop thesis require receive degree change 1924 formal requirement develop graduate degree include offer doctorate phd degree today college offer graduate education department college art letter offer phd program professional master divinity mdiv program exist department college science offer phd program department preprofessional study school architecture offer master architecture department college engineering offer phd program college business offer multiple professional program include mba master science accountancy program operate facility chicago cincinnati executive mba program additionally alliance catholic education program offer master education program student study university summer teach catholic elementary schools middle school high school southern united states school year | granting doctorate degree occur year notre dame | 1924 | [university, offer, graduate, degree, form, master, arts, ma, 18541855, academic, year, program, expand, include, master, law, llm, master, civil, engineering, early, stage, growth, formal, graduate, school, education, develop, thesis, require, receive, degree, change, 1924, formal, requirement, develop, graduate, degree, include, offer, doctorate, phd, degree, today, college, offer, graduate, education, department, college, art, letter, offer, phd, program, professional, master, divinity, mdiv, program, exist, department, college, science, offer, phd, program, department, preprofessional, study, school, architecture, offer, master, architecture, department, college, engineering, offer, phd, program, college, business, offer, multiple, professional, program, include, mba, master, science, accountancy, program, operate, facility, chicago, cincinnati, executive, mba, ...] | [granting, doctorate, degree, occur, year, notre, dame] | [1924] | [(university, NN), (offer, NN), (graduate, NN), (degree, JJ), (form, NN), (master, NN), (arts, NNS), (ma, VBD), (18541855, CD), (academic, JJ), (year, NN), (program, NN), (expand, VBP), (include, VBP), (master, NN), (law, NN), (llm, NN), (master, NN), (civil, JJ), (engineering, NN), (early, JJ), (stage, NN), (growth, NN), (formal, JJ), (graduate, NN), (school, NN), (education, NN), (develop, VB), (thesis, NN), (require, NN), (receive, JJ), (degree, NN), (change, NN), (1924, CD), (formal, JJ), (requirement, NN), (develop, VB), (graduate, JJ), (degree, NN), (include, VBP), (offer, NN), (doctorate, NN), (phd, NN), (degree, NN), (today, NN), (college, NN), (offer, NN), (graduate, JJ), (education, NN), (department, NN), (college, NN), (art, VBP), (letter, NN), (offer, NN), (phd, NN), (program, NN), (professional, JJ), (master, NN), (divinity, NN), (mdiv, JJ), (program, NN), (exist, VBP), (department, NN), (college, NN), (science, NN), (offer, NN), (phd, JJ), (program, NN), (department, NN), (preprofessional, NN), (study, NN), (school, NN), (architecture, NN), (offer, VBP), (master, NN), (architecture, NN), (department, NN), (college, NN), (engineering, NN), (offer, NN), (phd, JJ), (program, NN), (college, NN), (business, NN), (offer, VBP), (multiple, JJ), (professional, JJ), (program, NN), (include, VBP), (mba, JJ), (master, NN), (science, NN), (accountancy, NN), (program, NN), (operate, VBP), (facility, NN), (chicago, NN), (cincinnati, NN), (executive, NN), (mba, NN), ...] | [(granting, VBG), (doctorate, NN), (degree, NN), (occur, IN), (year, NN), (notre, RB), (dame, NN)] | [(1924, CD)] | [(university, NN, O), (offer, NN, O), (graduate, NN, O), (degree, JJ, O), (form, NN, O), (master, NN, O), (arts, NNS, O), (ma, VBD, O), (18541855, CD, O), (academic, JJ, O), (year, NN, O), (program, NN, O), (expand, VBP, O), (include, VBP, O), (master, NN, O), (law, NN, O), (llm, NN, O), (master, NN, O), (civil, JJ, O), (engineering, NN, O), (early, JJ, O), (stage, NN, O), (growth, NN, O), (formal, JJ, O), (graduate, NN, O), (school, NN, O), (education, NN, O), (develop, VB, O), (thesis, NN, O), (require, NN, O), (receive, JJ, O), (degree, NN, O), (change, NN, O), (1924, CD, O), (formal, JJ, O), (requirement, NN, O), (develop, VB, O), (graduate, JJ, O), (degree, NN, O), (include, VBP, O), (offer, NN, O), (doctorate, NN, O), (phd, NN, O), (degree, NN, O), (today, NN, O), (college, NN, O), (offer, NN, O), (graduate, JJ, O), (education, NN, O), (department, NN, O), (college, NN, O), (art, VBP, O), (letter, NN, O), (offer, NN, O), (phd, NN, O), (program, NN, O), (professional, JJ, O), (master, NN, O), (divinity, NN, O), (mdiv, JJ, O), (program, NN, O), (exist, VBP, O), (department, NN, O), (college, NN, O), (science, NN, O), (offer, NN, O), (phd, JJ, O), (program, NN, O), (department, NN, O), (preprofessional, NN, O), (study, NN, O), (school, NN, O), (architecture, NN, O), (offer, VBP, O), (master, NN, O), (architecture, NN, O), (department, NN, O), (college, NN, O), (engineering, NN, O), (offer, NN, O), (phd, JJ, O), (program, NN, O), (college, NN, O), (business, NN, O), (offer, VBP, O), (multiple, JJ, O), (professional, JJ, O), (program, NN, O), (include, VBP, O), (mba, JJ, O), (master, NN, O), (science, NN, O), (accountancy, NN, O), (program, NN, O), (operate, VBP, O), (facility, NN, O), (chicago, NN, O), (cincinnati, NN, O), (executive, NN, O), (mba, NN, O), ...] | [(granting, VBG, O), (doctorate, NN, O), (degree, NN, O), (occur, IN, O), (year, NN, O), (notre, RB, O), (dame, NN, O)] | [(1924, CD, O)] |
| 25 | university offer graduate degree form master arts ma 18541855 academic year program expand include master law llm master civil engineering early stage growth formal graduate school education develop thesis require receive degree change 1924 formal requirement develop graduate degree include offer doctorate phd degree today college offer graduate education department college art letter offer phd program professional master divinity mdiv program exist department college science offer phd program department preprofessional study school architecture offer master architecture department college engineering offer phd program college business offer multiple professional program include mba master science accountancy program operate facility chicago cincinnati executive mba program additionally alliance catholic education program offer master education program student study university summer teach catholic elementary schools middle school high school southern united states school year | type degree mdiv | master divinity | [university, offer, graduate, degree, form, master, arts, ma, 18541855, academic, year, program, expand, include, master, law, llm, master, civil, engineering, early, stage, growth, formal, graduate, school, education, develop, thesis, require, receive, degree, change, 1924, formal, requirement, develop, graduate, degree, include, offer, doctorate, phd, degree, today, college, offer, graduate, education, department, college, art, letter, offer, phd, program, professional, master, divinity, mdiv, program, exist, department, college, science, offer, phd, program, department, preprofessional, study, school, architecture, offer, master, architecture, department, college, engineering, offer, phd, program, college, business, offer, multiple, professional, program, include, mba, master, science, accountancy, program, operate, facility, chicago, cincinnati, executive, mba, ...] | [type, degree, mdiv] | [master, divinity] | [(university, NN), (offer, NN), (graduate, NN), (degree, JJ), (form, NN), (master, NN), (arts, NNS), (ma, VBD), (18541855, CD), (academic, JJ), (year, NN), (program, NN), (expand, VBP), (include, VBP), (master, NN), (law, NN), (llm, NN), (master, NN), (civil, JJ), (engineering, NN), (early, JJ), (stage, NN), (growth, NN), (formal, JJ), (graduate, NN), (school, NN), (education, NN), (develop, VB), (thesis, NN), (require, NN), (receive, JJ), (degree, NN), (change, NN), (1924, CD), (formal, JJ), (requirement, NN), (develop, VB), (graduate, JJ), (degree, NN), (include, VBP), (offer, NN), (doctorate, NN), (phd, NN), (degree, NN), (today, NN), (college, NN), (offer, NN), (graduate, JJ), (education, NN), (department, NN), (college, NN), (art, VBP), (letter, NN), (offer, NN), (phd, NN), (program, NN), (professional, JJ), (master, NN), (divinity, NN), (mdiv, JJ), (program, NN), (exist, VBP), (department, NN), (college, NN), (science, NN), (offer, NN), (phd, JJ), (program, NN), (department, NN), (preprofessional, NN), (study, NN), (school, NN), (architecture, NN), (offer, VBP), (master, NN), (architecture, NN), (department, NN), (college, NN), (engineering, NN), (offer, NN), (phd, JJ), (program, NN), (college, NN), (business, NN), (offer, VBP), (multiple, JJ), (professional, JJ), (program, NN), (include, VBP), (mba, JJ), (master, NN), (science, NN), (accountancy, NN), (program, NN), (operate, VBP), (facility, NN), (chicago, NN), (cincinnati, NN), (executive, NN), (mba, NN), ...] | [(type, NN), (degree, NN), (mdiv, NN)] | [(master, NN), (divinity, NN)] | [(university, NN, O), (offer, NN, O), (graduate, NN, O), (degree, JJ, O), (form, NN, O), (master, NN, O), (arts, NNS, O), (ma, VBD, O), (18541855, CD, O), (academic, JJ, O), (year, NN, O), (program, NN, O), (expand, VBP, O), (include, VBP, O), (master, NN, O), (law, NN, O), (llm, NN, O), (master, NN, O), (civil, JJ, O), (engineering, NN, O), (early, JJ, O), (stage, NN, O), (growth, NN, O), (formal, JJ, O), (graduate, NN, O), (school, NN, O), (education, NN, O), (develop, VB, O), (thesis, NN, O), (require, NN, O), (receive, JJ, O), (degree, NN, O), (change, NN, O), (1924, CD, O), (formal, JJ, O), (requirement, NN, O), (develop, VB, O), (graduate, JJ, O), (degree, NN, O), (include, VBP, O), (offer, NN, O), (doctorate, NN, O), (phd, NN, O), (degree, NN, O), (today, NN, O), (college, NN, O), (offer, NN, O), (graduate, JJ, O), (education, NN, O), (department, NN, O), (college, NN, O), (art, VBP, O), (letter, NN, O), (offer, NN, O), (phd, NN, O), (program, NN, O), (professional, JJ, O), (master, NN, O), (divinity, NN, O), (mdiv, JJ, O), (program, NN, O), (exist, VBP, O), (department, NN, O), (college, NN, O), (science, NN, O), (offer, NN, O), (phd, JJ, O), (program, NN, O), (department, NN, O), (preprofessional, NN, O), (study, NN, O), (school, NN, O), (architecture, NN, O), (offer, VBP, O), (master, NN, O), (architecture, NN, O), (department, NN, O), (college, NN, O), (engineering, NN, O), (offer, NN, O), (phd, JJ, O), (program, NN, O), (college, NN, O), (business, NN, O), (offer, VBP, O), (multiple, JJ, O), (professional, JJ, O), (program, NN, O), (include, VBP, O), (mba, JJ, O), (master, NN, O), (science, NN, O), (accountancy, NN, O), (program, NN, O), (operate, VBP, O), (facility, NN, O), (chicago, NN, O), (cincinnati, NN, O), (executive, NN, O), (mba, NN, O), ...] | [(type, NN, O), (degree, NN, O), (mdiv, NN, O)] | [(master, NN, O), (divinity, NN, O)] |
| 26 | university offer graduate degree form master arts ma 18541855 academic year program expand include master law llm master civil engineering early stage growth formal graduate school education develop thesis require receive degree change 1924 formal requirement develop graduate degree include offer doctorate phd degree today college offer graduate education department college art letter offer phd program professional master divinity mdiv program exist department college science offer phd program department preprofessional study school architecture offer master architecture department college engineering offer phd program college business offer multiple professional program include mba master science accountancy program operate facility chicago cincinnati executive mba program additionally alliance catholic education program offer master education program student study university summer teach catholic elementary schools middle school high school southern united states school year | program notre dame offer master education degree | alliance catholic education | [university, offer, graduate, degree, form, master, arts, ma, 18541855, academic, year, program, expand, include, master, law, llm, master, civil, engineering, early, stage, growth, formal, graduate, school, education, develop, thesis, require, receive, degree, change, 1924, formal, requirement, develop, graduate, degree, include, offer, doctorate, phd, degree, today, college, offer, graduate, education, department, college, art, letter, offer, phd, program, professional, master, divinity, mdiv, program, exist, department, college, science, offer, phd, program, department, preprofessional, study, school, architecture, offer, master, architecture, department, college, engineering, offer, phd, program, college, business, offer, multiple, professional, program, include, mba, master, science, accountancy, program, operate, facility, chicago, cincinnati, executive, mba, ...] | [program, notre, dame, offer, master, education, degree] | [alliance, catholic, education] | [(university, NN), (offer, NN), (graduate, NN), (degree, JJ), (form, NN), (master, NN), (arts, NNS), (ma, VBD), (18541855, CD), (academic, JJ), (year, NN), (program, NN), (expand, VBP), (include, VBP), (master, NN), (law, NN), (llm, NN), (master, NN), (civil, JJ), (engineering, NN), (early, JJ), (stage, NN), (growth, NN), (formal, JJ), (graduate, NN), (school, NN), (education, NN), (develop, VB), (thesis, NN), (require, NN), (receive, JJ), (degree, NN), (change, NN), (1924, CD), (formal, JJ), (requirement, NN), (develop, VB), (graduate, JJ), (degree, NN), (include, VBP), (offer, NN), (doctorate, NN), (phd, NN), (degree, NN), (today, NN), (college, NN), (offer, NN), (graduate, JJ), (education, NN), (department, NN), (college, NN), (art, VBP), (letter, NN), (offer, NN), (phd, NN), (program, NN), (professional, JJ), (master, NN), (divinity, NN), (mdiv, JJ), (program, NN), (exist, VBP), (department, NN), (college, NN), (science, NN), (offer, NN), (phd, JJ), (program, NN), (department, NN), (preprofessional, NN), (study, NN), (school, NN), (architecture, NN), (offer, VBP), (master, NN), (architecture, NN), (department, NN), (college, NN), (engineering, NN), (offer, NN), (phd, JJ), (program, NN), (college, NN), (business, NN), (offer, VBP), (multiple, JJ), (professional, JJ), (program, NN), (include, VBP), (mba, JJ), (master, NN), (science, NN), (accountancy, NN), (program, NN), (operate, VBP), (facility, NN), (chicago, NN), (cincinnati, NN), (executive, NN), (mba, NN), ...] | [(program, NN), (notre, NNS), (dame, VBP), (offer, VBP), (master, NN), (education, NN), (degree, NN)] | [(alliance, NN), (catholic, JJ), (education, NN)] | [(university, NN, O), (offer, NN, O), (graduate, NN, O), (degree, JJ, O), (form, NN, O), (master, NN, O), (arts, NNS, O), (ma, VBD, O), (18541855, CD, O), (academic, JJ, O), (year, NN, O), (program, NN, O), (expand, VBP, O), (include, VBP, O), (master, NN, O), (law, NN, O), (llm, NN, O), (master, NN, O), (civil, JJ, O), (engineering, NN, O), (early, JJ, O), (stage, NN, O), (growth, NN, O), (formal, JJ, O), (graduate, NN, O), (school, NN, O), (education, NN, O), (develop, VB, O), (thesis, NN, O), (require, NN, O), (receive, JJ, O), (degree, NN, O), (change, NN, O), (1924, CD, O), (formal, JJ, O), (requirement, NN, O), (develop, VB, O), (graduate, JJ, O), (degree, NN, O), (include, VBP, O), (offer, NN, O), (doctorate, NN, O), (phd, NN, O), (degree, NN, O), (today, NN, O), (college, NN, O), (offer, NN, O), (graduate, JJ, O), (education, NN, O), (department, NN, O), (college, NN, O), (art, VBP, O), (letter, NN, O), (offer, NN, O), (phd, NN, O), (program, NN, O), (professional, JJ, O), (master, NN, O), (divinity, NN, O), (mdiv, JJ, O), (program, NN, O), (exist, VBP, O), (department, NN, O), (college, NN, O), (science, NN, O), (offer, NN, O), (phd, JJ, O), (program, NN, O), (department, NN, O), (preprofessional, NN, O), (study, NN, O), (school, NN, O), (architecture, NN, O), (offer, VBP, O), (master, NN, O), (architecture, NN, O), (department, NN, O), (college, NN, O), (engineering, NN, O), (offer, NN, O), (phd, JJ, O), (program, NN, O), (college, NN, O), (business, NN, O), (offer, VBP, O), (multiple, JJ, O), (professional, JJ, O), (program, NN, O), (include, VBP, O), (mba, JJ, O), (master, NN, O), (science, NN, O), (accountancy, NN, O), (program, NN, O), (operate, VBP, O), (facility, NN, O), (chicago, NN, O), (cincinnati, NN, O), (executive, NN, O), (mba, NN, O), ...] | [(program, NN, O), (notre, NNS, O), (dame, VBP, O), (offer, VBP, O), (master, NN, O), (education, NN, O), (degree, NN, O)] | [(alliance, NN, O), (catholic, JJ, O), (education, NN, O)] |
| 27 | university offer graduate degree form master arts ma 18541855 academic year program expand include master law llm master civil engineering early stage growth formal graduate school education develop thesis require receive degree change 1924 formal requirement develop graduate degree include offer doctorate phd degree today college offer graduate education department college art letter offer phd program professional master divinity mdiv program exist department college science offer phd program department preprofessional study school architecture offer master architecture department college engineering offer phd program college business offer multiple professional program include mba master science accountancy program operate facility chicago cincinnati executive mba program additionally alliance catholic education program offer master education program student study university summer teach catholic elementary schools middle school high school southern united states school year | year master art course offer notre dame | 1854 | [university, offer, graduate, degree, form, master, arts, ma, 18541855, academic, year, program, expand, include, master, law, llm, master, civil, engineering, early, stage, growth, formal, graduate, school, education, develop, thesis, require, receive, degree, change, 1924, formal, requirement, develop, graduate, degree, include, offer, doctorate, phd, degree, today, college, offer, graduate, education, department, college, art, letter, offer, phd, program, professional, master, divinity, mdiv, program, exist, department, college, science, offer, phd, program, department, preprofessional, study, school, architecture, offer, master, architecture, department, college, engineering, offer, phd, program, college, business, offer, multiple, professional, program, include, mba, master, science, accountancy, program, operate, facility, chicago, cincinnati, executive, mba, ...] | [year, master, art, course, offer, notre, dame] | [1854] | [(university, NN), (offer, NN), (graduate, NN), (degree, JJ), (form, NN), (master, NN), (arts, NNS), (ma, VBD), (18541855, CD), (academic, JJ), (year, NN), (program, NN), (expand, VBP), (include, VBP), (master, NN), (law, NN), (llm, NN), (master, NN), (civil, JJ), (engineering, NN), (early, JJ), (stage, NN), (growth, NN), (formal, JJ), (graduate, NN), (school, NN), (education, NN), (develop, VB), (thesis, NN), (require, NN), (receive, JJ), (degree, NN), (change, NN), (1924, CD), (formal, JJ), (requirement, NN), (develop, VB), (graduate, JJ), (degree, NN), (include, VBP), (offer, NN), (doctorate, NN), (phd, NN), (degree, NN), (today, NN), (college, NN), (offer, NN), (graduate, JJ), (education, NN), (department, NN), (college, NN), (art, VBP), (letter, NN), (offer, NN), (phd, NN), (program, NN), (professional, JJ), (master, NN), (divinity, NN), (mdiv, JJ), (program, NN), (exist, VBP), (department, NN), (college, NN), (science, NN), (offer, NN), (phd, JJ), (program, NN), (department, NN), (preprofessional, NN), (study, NN), (school, NN), (architecture, NN), (offer, VBP), (master, NN), (architecture, NN), (department, NN), (college, NN), (engineering, NN), (offer, NN), (phd, JJ), (program, NN), (college, NN), (business, NN), (offer, VBP), (multiple, JJ), (professional, JJ), (program, NN), (include, VBP), (mba, JJ), (master, NN), (science, NN), (accountancy, NN), (program, NN), (operate, VBP), (facility, NN), (chicago, NN), (cincinnati, NN), (executive, NN), (mba, NN), ...] | [(year, NN), (master, RBR), (art, NN), (course, NN), (offer, NN), (notre, JJ), (dame, NN)] | [(1854, CD)] | [(university, NN, O), (offer, NN, O), (graduate, NN, O), (degree, JJ, O), (form, NN, O), (master, NN, O), (arts, NNS, O), (ma, VBD, O), (18541855, CD, O), (academic, JJ, O), (year, NN, O), (program, NN, O), (expand, VBP, O), (include, VBP, O), (master, NN, O), (law, NN, O), (llm, NN, O), (master, NN, O), (civil, JJ, O), (engineering, NN, O), (early, JJ, O), (stage, NN, O), (growth, NN, O), (formal, JJ, O), (graduate, NN, O), (school, NN, O), (education, NN, O), (develop, VB, O), (thesis, NN, O), (require, NN, O), (receive, JJ, O), (degree, NN, O), (change, NN, O), (1924, CD, O), (formal, JJ, O), (requirement, NN, O), (develop, VB, O), (graduate, JJ, O), (degree, NN, O), (include, VBP, O), (offer, NN, O), (doctorate, NN, O), (phd, NN, O), (degree, NN, O), (today, NN, O), (college, NN, O), (offer, NN, O), (graduate, JJ, O), (education, NN, O), (department, NN, O), (college, NN, O), (art, VBP, O), (letter, NN, O), (offer, NN, O), (phd, NN, O), (program, NN, O), (professional, JJ, O), (master, NN, O), (divinity, NN, O), (mdiv, JJ, O), (program, NN, O), (exist, VBP, O), (department, NN, O), (college, NN, O), (science, NN, O), (offer, NN, O), (phd, JJ, O), (program, NN, O), (department, NN, O), (preprofessional, NN, O), (study, NN, O), (school, NN, O), (architecture, NN, O), (offer, VBP, O), (master, NN, O), (architecture, NN, O), (department, NN, O), (college, NN, O), (engineering, NN, O), (offer, NN, O), (phd, JJ, O), (program, NN, O), (college, NN, O), (business, NN, O), (offer, VBP, O), (multiple, JJ, O), (professional, JJ, O), (program, NN, O), (include, VBP, O), (mba, JJ, O), (master, NN, O), (science, NN, O), (accountancy, NN, O), (program, NN, O), (operate, VBP, O), (facility, NN, O), (chicago, NN, O), (cincinnati, NN, O), (executive, NN, O), (mba, NN, O), ...] | [(year, NN, O), (master, RBR, O), (art, NN, O), (course, NN, O), (offer, NN, O), (notre, JJ, O), (dame, NN, O)] | [(1854, CD, O)] |
| 28 | university offer graduate degree form master arts ma 18541855 academic year program expand include master law llm master civil engineering early stage growth formal graduate school education develop thesis require receive degree change 1924 formal requirement develop graduate degree include offer doctorate phd degree today college offer graduate education department college art letter offer phd program professional master divinity mdiv program exist department college science offer phd program department preprofessional study school architecture offer master architecture department college engineering offer phd program college business offer multiple professional program include mba master science accountancy program operate facility chicago cincinnati executive mba program additionally alliance catholic education program offer master education program student study university summer teach catholic elementary schools middle school high school southern united states school year | department notre dame offer phd program | department preprofessional study | [university, offer, graduate, degree, form, master, arts, ma, 18541855, academic, year, program, expand, include, master, law, llm, master, civil, engineering, early, stage, growth, formal, graduate, school, education, develop, thesis, require, receive, degree, change, 1924, formal, requirement, develop, graduate, degree, include, offer, doctorate, phd, degree, today, college, offer, graduate, education, department, college, art, letter, offer, phd, program, professional, master, divinity, mdiv, program, exist, department, college, science, offer, phd, program, department, preprofessional, study, school, architecture, offer, master, architecture, department, college, engineering, offer, phd, program, college, business, offer, multiple, professional, program, include, mba, master, science, accountancy, program, operate, facility, chicago, cincinnati, executive, mba, ...] | [department, notre, dame, offer, phd, program] | [department, preprofessional, study] | [(university, NN), (offer, NN), (graduate, NN), (degree, JJ), (form, NN), (master, NN), (arts, NNS), (ma, VBD), (18541855, CD), (academic, JJ), (year, NN), (program, NN), (expand, VBP), (include, VBP), (master, NN), (law, NN), (llm, NN), (master, NN), (civil, JJ), (engineering, NN), (early, JJ), (stage, NN), (growth, NN), (formal, JJ), (graduate, NN), (school, NN), (education, NN), (develop, VB), (thesis, NN), (require, NN), (receive, JJ), (degree, NN), (change, NN), (1924, CD), (formal, JJ), (requirement, NN), (develop, VB), (graduate, JJ), (degree, NN), (include, VBP), (offer, NN), (doctorate, NN), (phd, NN), (degree, NN), (today, NN), (college, NN), (offer, NN), (graduate, JJ), (education, NN), (department, NN), (college, NN), (art, VBP), (letter, NN), (offer, NN), (phd, NN), (program, NN), (professional, JJ), (master, NN), (divinity, NN), (mdiv, JJ), (program, NN), (exist, VBP), (department, NN), (college, NN), (science, NN), (offer, NN), (phd, JJ), (program, NN), (department, NN), (preprofessional, NN), (study, NN), (school, NN), (architecture, NN), (offer, VBP), (master, NN), (architecture, NN), (department, NN), (college, NN), (engineering, NN), (offer, NN), (phd, JJ), (program, NN), (college, NN), (business, NN), (offer, VBP), (multiple, JJ), (professional, JJ), (program, NN), (include, VBP), (mba, JJ), (master, NN), (science, NN), (accountancy, NN), (program, NN), (operate, VBP), (facility, NN), (chicago, NN), (cincinnati, NN), (executive, NN), (mba, NN), ...] | [(department, NN), (notre, NN), (dame, NN), (offer, VBP), (phd, JJ), (program, NN)] | [(department, NN), (preprofessional, NN), (study, NN)] | [(university, NN, O), (offer, NN, O), (graduate, NN, O), (degree, JJ, O), (form, NN, O), (master, NN, O), (arts, NNS, O), (ma, VBD, O), (18541855, CD, O), (academic, JJ, O), (year, NN, O), (program, NN, O), (expand, VBP, O), (include, VBP, O), (master, NN, O), (law, NN, O), (llm, NN, O), (master, NN, O), (civil, JJ, O), (engineering, NN, O), (early, JJ, O), (stage, NN, O), (growth, NN, O), (formal, JJ, O), (graduate, NN, O), (school, NN, O), (education, NN, O), (develop, VB, O), (thesis, NN, O), (require, NN, O), (receive, JJ, O), (degree, NN, O), (change, NN, O), (1924, CD, O), (formal, JJ, O), (requirement, NN, O), (develop, VB, O), (graduate, JJ, O), (degree, NN, O), (include, VBP, O), (offer, NN, O), (doctorate, NN, O), (phd, NN, O), (degree, NN, O), (today, NN, O), (college, NN, O), (offer, NN, O), (graduate, JJ, O), (education, NN, O), (department, NN, O), (college, NN, O), (art, VBP, O), (letter, NN, O), (offer, NN, O), (phd, NN, O), (program, NN, O), (professional, JJ, O), (master, NN, O), (divinity, NN, O), (mdiv, JJ, O), (program, NN, O), (exist, VBP, O), (department, NN, O), (college, NN, O), (science, NN, O), (offer, NN, O), (phd, JJ, O), (program, NN, O), (department, NN, O), (preprofessional, NN, O), (study, NN, O), (school, NN, O), (architecture, NN, O), (offer, VBP, O), (master, NN, O), (architecture, NN, O), (department, NN, O), (college, NN, O), (engineering, NN, O), (offer, NN, O), (phd, JJ, O), (program, NN, O), (college, NN, O), (business, NN, O), (offer, VBP, O), (multiple, JJ, O), (professional, JJ, O), (program, NN, O), (include, VBP, O), (mba, JJ, O), (master, NN, O), (science, NN, O), (accountancy, NN, O), (program, NN, O), (operate, VBP, O), (facility, NN, O), (chicago, NN, O), (cincinnati, NN, O), (executive, NN, O), (mba, NN, O), ...] | [(department, NN, O), (notre, NN, O), (dame, NN, O), (offer, VBP, O), (phd, JJ, O), (program, NN, O)] | [(department, NN, O), (preprofessional, NN, O), (study, NN, O)] |
| 29 | joan b kroc institute international peace study university notre dame dedicate research education outreach cause violent conflict condition sustainable peace offer phd master undergraduate degree peace study found 1986 donation joan b kroc widow mcdonalds owner ray kroc institute inspire vision rev theodore m hesburgh csc president emeritus university notre dame institute contribute international policy discussion peace building practice | institute notre dame study reason violent conflict | joan b kroc institute international peace study | [joan, b, kroc, institute, international, peace, study, university, notre, dame, dedicate, research, education, outreach, cause, violent, conflict, condition, sustainable, peace, offer, phd, master, undergraduate, degree, peace, study, found, 1986, donation, joan, b, kroc, widow, mcdonalds, owner, ray, kroc, institute, inspire, vision, rev, theodore, m, hesburgh, csc, president, emeritus, university, notre, dame, institute, contribute, international, policy, discussion, peace, building, practice] | [institute, notre, dame, study, reason, violent, conflict] | [joan, b, kroc, institute, international, peace, study] | [(joan, NN), (b, NN), (kroc, NNP), (institute, NN), (international, JJ), (peace, NN), (study, NN), (university, NN), (notre, RB), (dame, JJ), (dedicate, NN), (research, NN), (education, NN), (outreach, NN), (cause, NN), (violent, JJ), (conflict, NN), (condition, NN), (sustainable, JJ), (peace, NN), (offer, NN), (phd, JJ), (master, NN), (undergraduate, JJ), (degree, NN), (peace, NN), (study, NN), (found, VBD), (1986, CD), (donation, NN), (joan, NN), (b, NN), (kroc, NN), (widow, NN), (mcdonalds, NNS), (owner, NN), (ray, NN), (kroc, NNP), (institute, NN), (inspire, NN), (vision, NN), (rev, NN), (theodore, IN), (m, JJ), (hesburgh, NN), (csc, NN), (president, NN), (emeritus, NN), (university, NN), (notre, NN), (dame, NN), (institute, NN), (contribute, VBP), (international, JJ), (policy, NN), (discussion, NN), (peace, NN), (building, NN), (practice, NN)] | [(institute, NN), (notre, NN), (dame, NN), (study, NN), (reason, NN), (violent, JJ), (conflict, NN)] | [(joan, NN), (b, NN), (kroc, NNP), (institute, NN), (international, JJ), (peace, NN), (study, NN)] | [(joan, NN, O), (b, NN, O), (kroc, NNP, O), (institute, NN, O), (international, JJ, O), (peace, NN, O), (study, NN, O), (university, NN, O), (notre, RB, O), (dame, JJ, O), (dedicate, NN, O), (research, NN, O), (education, NN, O), (outreach, NN, O), (cause, NN, O), (violent, JJ, O), (conflict, NN, O), (condition, NN, O), (sustainable, JJ, O), (peace, NN, O), (offer, NN, O), (phd, JJ, O), (master, NN, O), (undergraduate, JJ, O), (degree, NN, O), (peace, NN, O), (study, NN, O), (found, VBD, O), (1986, CD, O), (donation, NN, O), (joan, NN, O), (b, NN, O), (kroc, NN, O), (widow, NN, O), (mcdonalds, NNS, O), (owner, NN, O), (ray, NN, O), (kroc, NNP, O), (institute, NN, O), (inspire, NN, O), (vision, NN, O), (rev, NN, O), (theodore, IN, O), (m, JJ, O), (hesburgh, NN, O), (csc, NN, O), (president, NN, O), (emeritus, NN, O), (university, NN, O), (notre, NN, O), (dame, NN, O), (institute, NN, O), (contribute, VBP, O), (international, JJ, O), (policy, NN, O), (discussion, NN, O), (peace, NN, O), (building, NN, O), (practice, NN, O)] | [(institute, NN, O), (notre, NN, O), (dame, NN, O), (study, NN, O), (reason, NN, O), (violent, JJ, O), (conflict, NN, O)] | [(joan, NN, O), (b, NN, O), (kroc, NNP, O), (institute, NN, O), (international, JJ, O), (peace, NN, O), (study, NN, O)] |
If you look at the first 10 rows of the results printed above after POS tagging and NER, the tags are printed in terms of acronyms. Look at the below table to recognize those tags in human understandable manner.
POS Tags:
| Tag | Description |
|---|---|
| CC | Coordinating conjunction |
| CD | Cardinal number |
| DT | Determiner |
| EX | Existential there |
| FW | Foreign word |
| IN | Preposition or subordinating conjunction |
| JJ | Adjective |
| JJR | Adjective, comparative |
| JJS | Adjective, superlative |
| LS | List item marker |
| MD | Modal verb |
| NN | Noun, singular or mass |
| NNS | Noun, plural |
| NNP | Proper noun, singular |
| NNPS | Proper noun, plural |
| PDT | Predeterminer |
| POS | Possessive ending |
| PRP | Personal pronoun |
PRP$ |
Possessive pronoun |
| RB | Adverb |
| RBR | Adverb, comparative |
| RBS | Adverb, superlative |
| RP | Particle |
| TO | "to" as a preposition or infinitive marker |
| UH | Interjection |
| VB | Verb, base form |
| VBD | Verb, past tense |
| VBG | Verb, gerund or present participle |
| VBN | Verb, past participle |
| VBP | Verb, non-3rd person singular present |
| VBZ | Verb, 3rd person singular present |
| WDT | Wh-determiner |
| WP | Wh-pronoun |
WP$ |
Possessive wh-pronoun |
| WRB | Wh-adverb |
NER Tags:
| Tag | Description |
|---|---|
| O | Other (not a named entity) |
| B-PER | Beginning of a person's name |
| I-PER | Inside a person's name |
| B-ORG | Beginning of an organization |
| I-ORG | Inside an organization name |
| B-LOC | Beginning of a location |
| I-LOC | Inside a location name |
| B-MISC | Beginning of a miscellaneous entity |
| I-MISC | Inside a miscellaneous entity name |
| B-DATE | Beginning of a date expression |
| I-DATE | Inside a date expression |
| B-TIME | Beginning of a time expression |
| I-TIME | Inside a time expression |
| B-MONEY | Beginning of a monetary value |
| I-MONEY | Inside a monetary value |
| B-PERCENT | Beginning of a percentage expression |
| I-PERCENT | Inside a percentage expression |
| B-QUANTITY | Beginning of a quantity expression |
| I-QUANTITY | Inside a quantity expression |
| B-ART | Beginning of a work of art (e.g., books, movies) |
| I-ART | Inside a work of art |
| B-LANGUAGE | Beginning of a language name |
| I-LANGUAGE | Inside a language name |
| B-DEVICE | Beginning of a device name |
| I-DEVICE | Inside a device name |
| DATE | Specific dates or periods of time |
| CARDINAL | Numerical values indicating quantity |
| GPE | Countries, cities, and states |
| QUANTITY | Specific quantities, often related to measurements |
| ORG | Companies, agencies, institutions, and organizations |
| FAC | Buildings, airports, highways, bridges, and structures |
| NORP | Nationalities, ethnic groups, and religious/political affiliations |
If you look at the above resultant table, we can see the POS and NER tags tagged to each token which is fine.
But, the visualization is not properly understandable.
Hence, the spacy library provides a visualizer tool named displacy which will help in displaying and understanding NLP results, such as dependency parses and named entities. It's designed to help users easily visualize the linguistic annotations generated by SpaCy's models.
If you use displacy tool and visualize the Named Entities, it highlights named entities within text, categorizing them into types (like *PERSON, ORGANIZATION, LOCATION*, etc.) and providing an intuitive visual representation. This helps in quickly identifying and understanding the entities present in the text.
Look at the below code how displacy renders the tokens along with their Named Entities in Jupyter notebook.
If you see the below result, all the Named Entities are displayed in color-coded format for better understanding purpose.
context¶# Process and visualize Named Entities for the first 30 rows
for index in range(0, 30): # For the first 30 rows
context = data['context'].iloc[index] # Access the row value correctly
# Ensure context is a string
if isinstance(context, str):
# Process the text with SpaCy
doc = nlp(context)
# Render in Jupyter
displacy.render(doc, style='ent', jupyter=True)
else:
print(f"Row {index} does not contain a valid string for NER processing.")
question¶# Process and visualize Named Entities for the first 30 rows
for index in range(0, 30): # For the first 30 rows
question = data['question'].iloc[index] # Access the row value correctly
# Ensure question is a string
if isinstance(question, str):
# Process the text with SpaCy
doc = nlp(question)
# Render in Jupyter
displacy.render(doc, style='ent', jupyter=True)
else:
print(f"Row {index} does not contain a valid string for NER processing.")
answer¶# Process and visualize Named Entities for the first 30 rows
for index in range(0, 30): # For the first 30 rows
answer = data['answer'].iloc[index] # Access the row value correctly
# Ensure answer is a string
if isinstance(answer, str):
# Process the text with SpaCy
doc = nlp(answer)
# Render in Jupyter
displacy.render(doc, style='ent', jupyter=True)
else:
print(f"Row {index} does not contain a valid string for NER processing.")
When you use the displacy tool to visualize dependency parsing, it provides a clear representation of the grammatical structure of a sentence.
Each word in the sentence is displayed along with arrows that indicate how the words are connected to each other based on their grammatical relationships. This visualization helps in understanding the syntactic dependencies between words, making it easier to analyze sentence structure.
Dependency parsing analyzes the grammatical structure of a sentence to establish relationships between words, showing how they depend on one another.
Here are the steps of Understanding the Visualization:
In a dependency parse tree, each word is a node, and the edges (connections) indicate the relationships. Here is how to interpret it:
Roots: Typically, the main verb of the sentence is the root of the tree.
Branches: Each word connected to the root or other words represents a specific grammatical relationship (using the labels above).
Hierarchy: The structure shows which words are dependent on others, giving insight into the sentence’s grammatical structure.
Output: The result is a tree structure or graph that represents the dependencies among words, along with labels indicating the type of relationship (like nsubj, dobj, amod).
If you see the below output, you will notice some dependency labels. Here is the explanation of them.
| Dependency Label | Full Form | Meaning |
|---|---|---|
| nsubj | Nominal Subject | The noun phrase that acts as the subject of a verb. |
| nmod | Noun Modifier | A noun that modifies another noun. |
| amod | Adjectival Modifier | An adjective that modifies a noun. |
| advmod | Adverbial Modifier | An adverb that modifies a verb, adjective, or another adverb. |
| compound | Compound Modifier | A word that modifies another word and is part of a multi-word expression (e.g., "toothbrush"). |
| dep | Dependent | A generic label for dependencies that do not fit other categories. |
| advcl | Adverbial Clause | A dependent clause that acts as an adverb. |
| dobj | Direct Object | The noun phrase that receives the action of a transitive verb. |
| conj | Conjunction | A word that connects clauses or sentences (e.g., "and", "but"). |
| ROOT | Root | The main verb or the root of the syntactic structure. |
| punct | Punctuation | Marks such as commas, periods, or other punctuation in the sentence. |
| ccomp | Clausal Complement | A clause that acts as a complement to a verb. |
| xcomp | Open Clausal Complement | A dependent clause that is a complement of a verb but has its own subject. |
context column¶# Process and visualize Named Entities for the first 30 rows
for index in range(0, 30): # For the first 30 rows
context = data['context'].iloc[index] # Access the row value correctly
# Ensure context is a string
if isinstance(context, str):
# Process the text with SpaCy
doc = nlp(context)
options = {
"bg": "#E1F5FE",
"color": "#2E7D32",
"width": 600
}
# Render in Jupyter
displacy.render(doc, style='dep', options=options, jupyter=True)
else:
print(f"Row {index} does not contain a valid string for NER processing.")
question column¶# Process and visualize Named Entities for the first 30 rows
for index in range(0, 30): # For the first 30 rows
question = data['question'].iloc[index] # Access the row value correctly
# Ensure question is a string
if isinstance(question, str):
# Process the text with SpaCy
doc = nlp(question)
options = {
"bg": "#E1F5FE",
"color": "#2E7D32",
"width": 600
}
# Render in Jupyter
displacy.render(doc, style='dep', options=options, jupyter=True)
else:
print(f"Row {index} does not contain a valid string for NER processing.")
answer column¶# Process and visualize Named Entities for the first 30 rows
for index in range(0, 30): # For the first 30 rows
answer = data['answer'].iloc[index] # Access the row value correctly
# Ensure answer is a string
if isinstance(answer, str):
# Process the text with SpaCy
doc = nlp(answer)
options = {
"bg": "#E1F5FE",
"color": "#2E7D32",
"width": 600
}
# Render in Jupyter
displacy.render(doc, style='dep', options=options, jupyter=True)
else:
print(f"Row {index} does not contain a valid string for NER processing.")
To visualize the differences between the original text and the preprocessed text of the following, we can use several approaches. The goal of these visualizations is to understand how the preprocessing steps have transformed your text data.
merged_squad_df['context'] and data['context']merged_squad_df['question'] and data['question']merged_squad_df['answer'] and data['answer']Here are a few types of visualizations that might be useful:
context column (before preprocessing) and context column (after preprocessing).¶# Generate word cloud for original text
text_original = ' '.join(merged_squad_df['context'].astype(str))
wordcloud_original = WordCloud(width=800, height=400, background_color='black').generate(text_original)
# Generate word cloud for cleaned text
text_cleaned = ' '.join(data['context'].astype(str))
wordcloud_cleaned = WordCloud(width=800, height=400, background_color='black').generate(text_cleaned)
# Plot word clouds
plt.figure(figsize=(14, 7))
plt.subplot(1, 2, 1)
plt.imshow(wordcloud_original, interpolation='bilinear')
plt.title('Original Text')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(wordcloud_cleaned, interpolation='bilinear')
plt.title('Cleaned Text')
plt.axis('off')
plt.show()
question column (before preprocessing) and question column (after preprocessing).¶# Generate word cloud for original text
text_original = ' '.join(merged_squad_df['question'].astype(str))
wordcloud_original = WordCloud(width=800, height=400, background_color='black').generate(text_original)
# Generate word cloud for cleaned text
text_cleaned = ' '.join(data['question'].astype(str))
wordcloud_cleaned = WordCloud(width=800, height=400, background_color='black').generate(text_cleaned)
# Plot word clouds
plt.figure(figsize=(14, 7))
plt.subplot(1, 2, 1)
plt.imshow(wordcloud_original, interpolation='bilinear')
plt.title('Original Text')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(wordcloud_cleaned, interpolation='bilinear')
plt.title('Cleaned Text')
plt.axis('off')
plt.show()
answer column (before preprocessing) and answer column (after preprocessing).¶# Generate word cloud for original text
text_original = ' '.join(merged_squad_df['answer'].astype(str))
wordcloud_original = WordCloud(width=800, height=400, background_color='black').generate(text_original)
# Generate word cloud for cleaned text
text_cleaned = ' '.join(data['answer'].astype(str))
wordcloud_cleaned = WordCloud(width=800, height=400, background_color='black').generate(text_cleaned)
# Plot word clouds
plt.figure(figsize=(14, 7))
plt.subplot(1, 2, 1)
plt.imshow(wordcloud_original, interpolation='bilinear')
plt.title('Original Text')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(wordcloud_cleaned, interpolation='bilinear')
plt.title('Cleaned Text')
plt.axis('off')
plt.show()
data['context'] after preprocessing the text data.¶# Combine all text into a single string
all_text = ' '.join(data['context'])
# Create a TextBlob object
blob = TextBlob(all_text)
# Get the words (list of words in the TextBlob)
words = blob.words
# Count the words
word_counts = Counter(words)
# Get the top 50 words
top_words = word_counts.most_common(50)
words, counts = zip(*top_words)
# Plot the Area plot
plt.figure(figsize=(12, 6))
plt.fill_between(words, counts, color='orange', alpha=0.5)
plt.title('Area Plot of Top Words in context column')
plt.xlabel('Words')
plt.ylabel('Frequency')
plt.xticks(rotation=90)
plt.grid(True)
plt.show()
data['question'] after preprocessing the text data.¶# Combine all text into a single string
all_text = ' '.join(data['question'])
# Create a TextBlob object
blob = TextBlob(all_text)
# Get the words (list of words in the TextBlob)
words = blob.words
# Count the words
word_counts = Counter(words)
# Get the top 50 words
top_words = word_counts.most_common(50)
words, counts = zip(*top_words)
# Plot the Area plot
plt.figure(figsize=(12, 6))
plt.fill_between(words, counts, color='green', alpha=0.5)
plt.title('Area Plot of Top Words in question column')
plt.xlabel('Words')
plt.ylabel('Frequency')
plt.xticks(rotation=90)
plt.grid(True)
plt.show()
data['answer'] after preprocessing the text data.¶# Combine all text into a single string
all_text = ' '.join(data['answer'])
# Create a TextBlob object
blob = TextBlob(all_text)
# Get the words (list of words in the TextBlob)
words = blob.words
# Count the words
word_counts = Counter(words)
# Get the top 50 words
top_words = word_counts.most_common(50)
words, counts = zip(*top_words)
# Plot the Area plot
plt.figure(figsize=(12, 6))
plt.fill_between(words, counts, color='blue', alpha=0.5)
plt.title('Area Plot of Top Words in answer column')
plt.xlabel('Words')
plt.ylabel('Frequency')
plt.xticks(rotation=90)
plt.grid(True)
plt.show()
data.to_csv("squad_processed.csv", index=False)
I have initially downloaded and loaded en_core_web_lg model. It has more number of parameters and these are actually sufficient for NER and POS. This can handle heavy number of records in the dataset.
All the text preprocessing was performed using SpaCy library.
Justification: While NLTK is highly flexible and offers a wide range of tools for text analysis and education, it can require more boilerplate code and manual steps for preprocessing tasks. SpaCy, on the other hand, is streamlined for practical applications, making it a favorite for many developers and researchers looking for efficiency and ease of use in text preprocessing.
As part of text preprocessing, we have the followed the below order:
Note: I could have tokenized the text at first place itself. But because of the above mentioned HTML tags, URLs, email IDs etc, there is a high chance that noise is present and then it will also be tokenized. Hence, tokenization was performed after all the above points.
As part of "Visualization of obtained Named Entities Results" section, very few sentences have not recognized Named Entities.
However, this model has recognized almost most of the sentences properly.
This recognition is actually done to understand the relation, context etc between each each word of the sentence.
I have plotted the word clouds for both raw and preprocessed texts for all the the following columns separately. This way, I have understood that most repeated words are displayed in larger size followed by the least repeated words displayed in smaller size.
I have also plotted the frequecies of POS tags of each sentence.
Finally, I have also plotted the top 50 words of the prepreocessed text using an Area plot.